gitee.com/leisunstar/runtime@v0.0.0-20200521203717-5cef3e7b53f9/virtcontainers/clh.go (about) 1 // Copyright (c) 2019 Ericsson Eurolab Deutschland GmbH 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 package virtcontainers 7 8 import ( 9 "context" 10 "encoding/json" 11 "fmt" 12 "io" 13 "io/ioutil" 14 "net" 15 "net/http" 16 "os" 17 "os/exec" 18 "path/filepath" 19 "strconv" 20 "strings" 21 "syscall" 22 "time" 23 24 persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" 25 chclient "github.com/kata-containers/runtime/virtcontainers/pkg/cloud-hypervisor/client" 26 "github.com/opencontainers/selinux/go-selinux/label" 27 opentracing "github.com/opentracing/opentracing-go" 28 "github.com/pkg/errors" 29 log "github.com/sirupsen/logrus" 30 31 "github.com/kata-containers/runtime/virtcontainers/device/config" 32 "github.com/kata-containers/runtime/virtcontainers/types" 33 "github.com/kata-containers/runtime/virtcontainers/utils" 34 ) 35 36 // 37 // Constants and type definitions related to cloud hypervisor 38 // 39 40 type clhState uint8 41 42 const ( 43 clhNotReady clhState = iota 44 clhReady 45 ) 46 47 const ( 48 clhStateCreated = "Created" 49 clhStateRunning = "Running" 50 ) 51 52 const ( 53 // Values are mandatory by http API 54 // Values based on: 55 clhTimeout = 10 56 clhAPITimeout = 1 57 // Timeout for hot-plug - hotplug devices can take more time, than usual API calls 58 // Use longer time timeout for it. 59 clhHotPlugAPITimeout = 5 60 clhStopSandboxTimeout = 3 61 clhSocket = "clh.sock" 62 clhAPISocket = "clh-api.sock" 63 virtioFsSocket = "virtiofsd.sock" 64 supportedMajorVersion = 0 65 supportedMinorVersion = 5 66 defaultClhPath = "/usr/local/bin/cloud-hypervisor" 67 virtioFsCacheAlways = "always" 68 ) 69 70 // Interface that hides the implementation of openAPI client 71 // If the client changes its methods, this interface should do it as well, 72 // The main purpose is to hide the client in an interface to allow mock testing. 73 // This is an interface that has to match with OpenAPI CLH client 74 type clhClient interface { 75 // Check for the REST API availability 76 VmmPingGet(ctx context.Context) (chclient.VmmPingResponse, *http.Response, error) 77 // Shut the VMM down 78 ShutdownVMM(ctx context.Context) (*http.Response, error) 79 // Create the VM 80 CreateVM(ctx context.Context, vmConfig chclient.VmConfig) (*http.Response, error) 81 // Dump the VM information 82 // No lint: golint suggest to rename to VMInfoGet. 83 VmInfoGet(ctx context.Context) (chclient.VmInfo, *http.Response, error) //nolint:golint 84 // Boot the VM 85 BootVM(ctx context.Context) (*http.Response, error) 86 // Add/remove CPUs to/from the VM 87 VmResizePut(ctx context.Context, vmResize chclient.VmResize) (*http.Response, error) 88 // Add VFIO PCI device to the VM 89 VmAddDevicePut(ctx context.Context, vmAddDevice chclient.VmAddDevice) (*http.Response, error) 90 // Add a new disk device to the VM 91 VmAddDiskPut(ctx context.Context, diskConfig chclient.DiskConfig) (*http.Response, error) 92 } 93 94 type CloudHypervisorVersion struct { 95 Major int 96 Minor int 97 Revision int 98 } 99 100 // 101 // Cloud hypervisor state 102 // 103 type CloudHypervisorState struct { 104 state clhState 105 PID int 106 VirtiofsdPID int 107 apiSocket string 108 } 109 110 func (s *CloudHypervisorState) reset() { 111 s.PID = 0 112 s.VirtiofsdPID = 0 113 s.state = clhNotReady 114 } 115 116 type cloudHypervisor struct { 117 id string 118 state CloudHypervisorState 119 config HypervisorConfig 120 ctx context.Context 121 APIClient clhClient 122 version CloudHypervisorVersion 123 vmconfig chclient.VmConfig 124 virtiofsd Virtiofsd 125 store persistapi.PersistDriver 126 } 127 128 var clhKernelParams = []Param{ 129 130 {"root", "/dev/pmem0p1"}, 131 {"panic", "1"}, // upon kernel panic wait 1 second before reboot 132 {"no_timer_check", ""}, // do not check broken timer IRQ resources 133 {"noreplace-smp", ""}, // do not replace SMP instructions 134 {"rootflags", "data=ordered,errors=remount-ro ro"}, // mount the root filesystem as readonly 135 {"rootfstype", "ext4"}, 136 } 137 138 var clhDebugKernelParams = []Param{ 139 140 {"console", "ttyS0,115200n8"}, // enable serial console 141 {"systemd.log_target", "console"}, // send loggng to the console 142 } 143 144 //########################################################### 145 // 146 // hypervisor interface implementation for cloud-hypervisor 147 // 148 //########################################################### 149 150 func (clh *cloudHypervisor) checkVersion() error { 151 if clh.version.Major < supportedMajorVersion || (clh.version.Major == supportedMajorVersion && clh.version.Minor < supportedMinorVersion) { 152 errorMessage := fmt.Sprintf("Unsupported version: cloud-hypervisor %d.%d not supported by this driver version (%d.%d)", 153 clh.version.Major, 154 clh.version.Minor, 155 supportedMajorVersion, 156 supportedMinorVersion) 157 return errors.New(errorMessage) 158 } 159 return nil 160 } 161 162 // For cloudHypervisor this call only sets the internal structure up. 163 // The VM will be created and started through startSandbox(). 164 func (clh *cloudHypervisor) createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig, stateful bool) error { 165 clh.ctx = ctx 166 167 span, _ := clh.trace("createSandbox") 168 defer span.Finish() 169 170 err := hypervisorConfig.valid() 171 if err != nil { 172 return err 173 } 174 175 clh.id = id 176 clh.config = *hypervisorConfig 177 clh.state.state = clhNotReady 178 179 // version check only applicable to 'cloud-hypervisor' executable 180 clhPath, perr := clh.clhPath() 181 if perr != nil { 182 return perr 183 184 } 185 if strings.HasSuffix(clhPath, "cloud-hypervisor") { 186 err = clh.getAvailableVersion() 187 if err != nil { 188 return err 189 190 } 191 192 if err := clh.checkVersion(); err != nil { 193 return err 194 } 195 196 } 197 198 clh.Logger().WithField("function", "createSandbox").Info("creating Sandbox") 199 200 virtiofsdSocketPath, err := clh.virtioFsSocketPath(clh.id) 201 if err != nil { 202 return nil 203 204 } 205 206 if clh.state.PID > 0 { 207 clh.Logger().WithField("function", "createSandbox").Info("Sandbox already exist, loading from state") 208 clh.virtiofsd = &virtiofsd{ 209 PID: clh.state.VirtiofsdPID, 210 sourcePath: filepath.Join(kataHostSharedDir(), clh.id), 211 debug: clh.config.Debug, 212 socketPath: virtiofsdSocketPath, 213 } 214 return nil 215 } 216 217 // No need to return an error from there since there might be nothing 218 // to fetch if this is the first time the hypervisor is created. 219 clh.Logger().WithField("function", "createSandbox").WithError(err).Info("Sandbox not found creating ") 220 221 // Set initial memomory size of the virtual machine 222 // Convert to int64 openApiClient only support int64 223 clh.vmconfig.Memory.Size = int64((utils.MemUnit(clh.config.MemorySize) * utils.MiB).ToBytes()) 224 clh.vmconfig.Memory.File = "/dev/shm" 225 // shared memory should be enabled if using vhost-user(kata uses virtiofsd) 226 clh.vmconfig.Memory.Shared = true 227 hostMemKb, err := getHostMemorySizeKb(procMemInfo) 228 if err != nil { 229 return nil 230 } 231 232 // OpenAPI only supports int64 values 233 clh.vmconfig.Memory.HotplugSize = int64((utils.MemUnit(hostMemKb) * utils.KiB).ToBytes()) 234 // Set initial amount of cpu's for the virtual machine 235 clh.vmconfig.Cpus = chclient.CpusConfig{ 236 // cast to int32, as openAPI has a limitation that it does not support unsigned values 237 BootVcpus: int32(clh.config.NumVCPUs), 238 MaxVcpus: int32(clh.config.DefaultMaxVCPUs), 239 } 240 241 // Add the kernel path 242 kernelPath, err := clh.config.KernelAssetPath() 243 if err != nil { 244 return err 245 } 246 clh.vmconfig.Kernel = chclient.KernelConfig{ 247 Path: kernelPath, 248 } 249 250 // First take the default parameters defined by this driver 251 params := clhKernelParams 252 253 // Followed by extra debug parameters if debug enabled in configuration file 254 if clh.config.Debug { 255 params = append(params, clhDebugKernelParams...) 256 } 257 258 // Followed by extra debug parameters defined in the configuration file 259 params = append(params, clh.config.KernelParams...) 260 261 clh.vmconfig.Cmdline.Args = kernelParamsToString(params) 262 263 // set random device generator to hypervisor 264 clh.vmconfig.Rng = chclient.RngConfig{ 265 Src: clh.config.EntropySource, 266 } 267 268 // set the initial root/boot disk of hypervisor 269 imagePath, err := clh.config.ImageAssetPath() 270 if err != nil { 271 return err 272 } 273 274 if imagePath == "" { 275 return errors.New("image path is empty") 276 } 277 278 pmem := chclient.PmemConfig{ 279 File: imagePath, 280 DiscardWrites: true, 281 } 282 clh.vmconfig.Pmem = append(clh.vmconfig.Pmem, pmem) 283 284 // set the serial console to the cloud hypervisor 285 if clh.config.Debug { 286 clh.vmconfig.Serial = chclient.ConsoleConfig{ 287 Mode: cctTTY, 288 } 289 290 } else { 291 clh.vmconfig.Serial = chclient.ConsoleConfig{ 292 Mode: cctNULL, 293 } 294 } 295 296 clh.vmconfig.Console = chclient.ConsoleConfig{ 297 Mode: cctOFF, 298 } 299 300 // Overwrite the default value of HTTP API socket path for cloud hypervisor 301 apiSocketPath, err := clh.apiSocketPath(id) 302 if err != nil { 303 clh.Logger().Info("Invalid api socket path for cloud-hypervisor") 304 return nil 305 } 306 clh.state.apiSocket = apiSocketPath 307 308 clh.virtiofsd = &virtiofsd{ 309 path: clh.config.VirtioFSDaemon, 310 sourcePath: filepath.Join(kataHostSharedDir(), clh.id), 311 socketPath: virtiofsdSocketPath, 312 extraArgs: clh.config.VirtioFSExtraArgs, 313 debug: clh.config.Debug, 314 cache: clh.config.VirtioFSCache, 315 } 316 317 return nil 318 } 319 320 // startSandbox will start the VMM and boot the virtual machine for the given sandbox. 321 func (clh *cloudHypervisor) startSandbox(timeout int) error { 322 span, _ := clh.trace("startSandbox") 323 defer span.Finish() 324 325 ctx, cancel := context.WithTimeout(context.Background(), clhAPITimeout*time.Second) 326 defer cancel() 327 328 clh.Logger().WithField("function", "startSandbox").Info("starting Sandbox") 329 330 vmPath := filepath.Join(clh.store.RunVMStoragePath(), clh.id) 331 err := os.MkdirAll(vmPath, DirMode) 332 if err != nil { 333 return err 334 } 335 336 if clh.virtiofsd == nil { 337 return errors.New("Missing virtiofsd configuration") 338 } 339 340 // This needs to be done as late as possible, just before launching 341 // virtiofsd are executed by kata-runtime after this call, run with 342 // the SELinux label. If these processes require privileged, we do 343 // notwant to run them under confinement. 344 if err := label.SetProcessLabel(clh.config.SELinuxProcessLabel); err != nil { 345 return err 346 } 347 defer label.SetProcessLabel("") 348 349 if clh.config.SharedFS == config.VirtioFS { 350 clh.Logger().WithField("function", "startSandbox").Info("Starting virtiofsd") 351 pid, err := clh.virtiofsd.Start(ctx) 352 if err != nil { 353 return err 354 } 355 clh.state.VirtiofsdPID = pid 356 } else { 357 return errors.New("cloud-hypervisor only supports virtio based file sharing") 358 } 359 360 var strErr string 361 strErr, pid, err := clh.LaunchClh() 362 if err != nil { 363 if shutdownErr := clh.virtiofsd.Stop(); shutdownErr != nil { 364 clh.Logger().WithField("error", shutdownErr).Warn("error shutting down Virtiofsd") 365 } 366 return fmt.Errorf("failed to launch cloud-hypervisor: %q, hypervisor output:\n%s", err, strErr) 367 } 368 clh.state.PID = pid 369 370 if err := clh.bootVM(ctx); err != nil { 371 return err 372 } 373 374 clh.state.state = clhReady 375 return nil 376 } 377 378 // getSandboxConsole builds the path of the console where we can read 379 // logs coming from the sandbox. 380 func (clh *cloudHypervisor) getSandboxConsole(id string) (string, error) { 381 clh.Logger().WithField("function", "getSandboxConsole").WithField("id", id).Info("Get Sandbox Console") 382 return "", nil 383 } 384 385 func (clh *cloudHypervisor) disconnect() { 386 clh.Logger().WithField("function", "disconnect").Info("Disconnecting Sandbox Console") 387 } 388 389 func (clh *cloudHypervisor) getThreadIDs() (vcpuThreadIDs, error) { 390 391 clh.Logger().WithField("function", "getThreadIDs").Info("get thread ID's") 392 393 var vcpuInfo vcpuThreadIDs 394 395 vcpuInfo.vcpus = make(map[int]int) 396 397 return vcpuInfo, nil 398 } 399 400 func (clh *cloudHypervisor) hotplugBlockDevice(drive *config.BlockDrive) error { 401 if clh.config.BlockDeviceDriver != config.VirtioBlock { 402 return fmt.Errorf("incorrect hypervisor configuration on 'block_device_driver':"+ 403 " using '%v' but only support '%v'", clh.config.BlockDeviceDriver, config.VirtioBlock) 404 } 405 406 cl := clh.client() 407 ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second) 408 defer cancel() 409 410 _, _, err := cl.VmmPingGet(ctx) 411 if err != nil { 412 return openAPIClientError(err) 413 } 414 415 if drive.Pmem { 416 err = fmt.Errorf("pmem device hotplug not supported") 417 } else { 418 blkDevice := chclient.DiskConfig{ 419 Path: drive.File, 420 Readonly: drive.ReadOnly, 421 VhostUser: false, 422 } 423 _, err = cl.VmAddDiskPut(ctx, blkDevice) 424 } 425 426 if err != nil { 427 err = fmt.Errorf("failed to hotplug block device %+v %s", drive, openAPIClientError(err)) 428 } 429 return err 430 } 431 432 func (clh *cloudHypervisor) hotPlugVFIODevice(device config.VFIODev) error { 433 cl := clh.client() 434 ctx, cancel := context.WithTimeout(context.Background(), clhHotPlugAPITimeout*time.Second) 435 defer cancel() 436 437 _, _, err := cl.VmmPingGet(ctx) 438 if err != nil { 439 return openAPIClientError(err) 440 } 441 442 _, err = cl.VmAddDevicePut(ctx, chclient.VmAddDevice{Path: device.SysfsDev}) 443 if err != nil { 444 err = fmt.Errorf("Failed to hotplug device %+v %s", device, openAPIClientError(err)) 445 } 446 return err 447 } 448 449 func (clh *cloudHypervisor) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) { 450 span, _ := clh.trace("hotplugAddDevice") 451 defer span.Finish() 452 453 switch devType { 454 case blockDev: 455 drive := devInfo.(*config.BlockDrive) 456 return nil, clh.hotplugBlockDevice(drive) 457 case vfioDev: 458 device := devInfo.(*config.VFIODev) 459 return nil, clh.hotPlugVFIODevice(*device) 460 default: 461 return nil, fmt.Errorf("cannot hotplug device: unsupported device type '%v'", devType) 462 } 463 464 } 465 466 func (clh *cloudHypervisor) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) { 467 clh.Logger().WithField("function", "hotplugRemoveDevice").Warn("hotplug remove device not supported") 468 return nil, nil 469 } 470 471 func (clh *cloudHypervisor) hypervisorConfig() HypervisorConfig { 472 return clh.config 473 } 474 475 func (clh *cloudHypervisor) resizeMemory(reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, memoryDevice, error) { 476 477 // TODO: Add support for virtio-mem 478 479 if probe { 480 return 0, memoryDevice{}, errors.New("probe memory is not supported for cloud-hypervisor") 481 } 482 483 if reqMemMB == 0 { 484 // This is a corner case if requested to resize to 0 means something went really wrong. 485 return 0, memoryDevice{}, errors.New("Can not resize memory to 0") 486 } 487 488 info, err := clh.vmInfo() 489 if err != nil { 490 return 0, memoryDevice{}, err 491 } 492 493 currentMem := utils.MemUnit(info.Config.Memory.Size) * utils.Byte 494 newMem := utils.MemUnit(reqMemMB) * utils.MiB 495 496 // Early check to verify if boot memory is the same as requested 497 if currentMem == newMem { 498 clh.Logger().WithField("memory", reqMemMB).Debugf("VM already has requested memory") 499 return uint32(currentMem.ToMiB()), memoryDevice{}, nil 500 } 501 502 if currentMem > newMem { 503 clh.Logger().Warn("Remove memory is not supported, nothing to do") 504 return uint32(currentMem.ToMiB()), memoryDevice{}, nil 505 } 506 507 blockSize := utils.MemUnit(memoryBlockSizeMB) * utils.MiB 508 hotplugSize := (newMem - currentMem).AlignMem(blockSize) 509 510 // Update memory request to increase memory aligned block 511 alignedRequest := currentMem + hotplugSize 512 if newMem != alignedRequest { 513 clh.Logger().WithFields(log.Fields{"request": newMem, "aligned-request": alignedRequest}).Debug("aligning VM memory request") 514 newMem = alignedRequest 515 } 516 517 // Check if memory is the same as requested, a second check is done 518 // to consider the memory request now that is updated to be memory aligned 519 if currentMem == newMem { 520 clh.Logger().WithFields(log.Fields{"current-memory": currentMem, "new-memory": newMem}).Debug("VM already has requested memory(after alignment)") 521 return uint32(currentMem.ToMiB()), memoryDevice{}, nil 522 } 523 524 cl := clh.client() 525 ctx, cancelResize := context.WithTimeout(context.Background(), clhAPITimeout*time.Second) 526 defer cancelResize() 527 528 // OpenApi does not support uint64, convert to int64 529 resize := chclient.VmResize{DesiredRam: int64(newMem.ToBytes())} 530 clh.Logger().WithFields(log.Fields{"current-memory": currentMem, "new-memory": newMem}).Debug("updating VM memory") 531 if _, err = cl.VmResizePut(ctx, resize); err != nil { 532 clh.Logger().WithFields(log.Fields{"current-memory": currentMem, "new-memory": newMem}).Warnf("failed to update memory %s", openAPIClientError(err)) 533 err = fmt.Errorf("Failed to resize memory from %d to %d: %s", currentMem, newMem, openAPIClientError(err)) 534 return uint32(currentMem.ToMiB()), memoryDevice{}, openAPIClientError(err) 535 } 536 537 return uint32(newMem.ToMiB()), memoryDevice{sizeMB: int(hotplugSize.ToMiB())}, nil 538 } 539 540 func (clh *cloudHypervisor) resizeVCPUs(reqVCPUs uint32) (currentVCPUs uint32, newVCPUs uint32, err error) { 541 cl := clh.client() 542 543 // Retrieve the number of current vCPUs via HTTP API 544 info, err := clh.vmInfo() 545 if err != nil { 546 clh.Logger().WithField("function", "resizeVCPUs").WithError(err).Info("[clh] vmInfo failed") 547 return 0, 0, openAPIClientError(err) 548 } 549 550 currentVCPUs = uint32(info.Config.Cpus.BootVcpus) 551 newVCPUs = currentVCPUs 552 553 // Sanity check 554 if reqVCPUs == 0 { 555 clh.Logger().WithField("function", "resizeVCPUs").Debugf("Cannot resize vCPU to 0") 556 return currentVCPUs, newVCPUs, fmt.Errorf("Cannot resize vCPU to 0") 557 } 558 if reqVCPUs > uint32(info.Config.Cpus.MaxVcpus) { 559 clh.Logger().WithFields(log.Fields{ 560 "function": "resizeVCPUs", 561 "reqVCPUs": reqVCPUs, 562 "clhMaxVCPUs": info.Config.Cpus.MaxVcpus, 563 }).Warn("exceeding the 'clhMaxVCPUs' (resizing to 'clhMaxVCPUs')") 564 565 reqVCPUs = uint32(info.Config.Cpus.MaxVcpus) 566 } 567 568 // Resize (hot-plug) vCPUs via HTTP API 569 ctx, cancel := context.WithTimeout(context.Background(), clhAPITimeout*time.Second) 570 defer cancel() 571 if _, err = cl.VmResizePut(ctx, chclient.VmResize{DesiredVcpus: int32(reqVCPUs)}); err != nil { 572 return currentVCPUs, newVCPUs, errors.Wrap(err, "[clh] VmResizePut failed") 573 } 574 575 newVCPUs = reqVCPUs 576 577 return currentVCPUs, newVCPUs, nil 578 } 579 580 func (clh *cloudHypervisor) cleanup() error { 581 clh.Logger().WithField("function", "cleanup").Info("cleanup") 582 return nil 583 } 584 585 func (clh *cloudHypervisor) pauseSandbox() error { 586 clh.Logger().WithField("function", "pauseSandbox").Info("Pause Sandbox") 587 return nil 588 } 589 590 func (clh *cloudHypervisor) saveSandbox() error { 591 clh.Logger().WithField("function", "saveSandboxC").Info("Save Sandbox") 592 return nil 593 } 594 595 func (clh *cloudHypervisor) resumeSandbox() error { 596 clh.Logger().WithField("function", "resumeSandbox").Info("Resume Sandbox") 597 return nil 598 } 599 600 // stopSandbox will stop the Sandbox's VM. 601 func (clh *cloudHypervisor) stopSandbox() (err error) { 602 span, _ := clh.trace("stopSandbox") 603 defer span.Finish() 604 clh.Logger().WithField("function", "stopSandbox").Info("Stop Sandbox") 605 return clh.terminate() 606 } 607 608 func (clh *cloudHypervisor) fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error { 609 return errors.New("cloudHypervisor is not supported by VM cache") 610 } 611 612 func (clh *cloudHypervisor) toGrpc() ([]byte, error) { 613 return nil, errors.New("cloudHypervisor is not supported by VM cache") 614 } 615 616 func (clh *cloudHypervisor) save() (s persistapi.HypervisorState) { 617 s.Pid = clh.state.PID 618 s.Type = string(ClhHypervisor) 619 s.VirtiofsdPid = clh.state.VirtiofsdPID 620 s.APISocket = clh.state.apiSocket 621 return 622 } 623 624 func (clh *cloudHypervisor) load(s persistapi.HypervisorState) { 625 clh.state.PID = s.Pid 626 clh.state.VirtiofsdPID = s.VirtiofsdPid 627 clh.state.apiSocket = s.APISocket 628 } 629 630 func (clh *cloudHypervisor) check() error { 631 cl := clh.client() 632 ctx, cancel := context.WithTimeout(context.Background(), clhAPITimeout*time.Second) 633 defer cancel() 634 635 _, _, err := cl.VmmPingGet(ctx) 636 return err 637 } 638 639 func (clh *cloudHypervisor) getPids() []int { 640 641 var pids []int 642 pids = append(pids, clh.state.PID) 643 644 return pids 645 } 646 647 func (clh *cloudHypervisor) addDevice(devInfo interface{}, devType deviceType) error { 648 span, _ := clh.trace("addDevice") 649 defer span.Finish() 650 651 var err error 652 653 switch v := devInfo.(type) { 654 case Endpoint: 655 if err := clh.addNet(v); err != nil { 656 return err 657 } 658 case types.HybridVSock: 659 clh.addVSock(defaultGuestVSockCID, v.UdsPath) 660 case types.Volume: 661 err = clh.addVolume(v) 662 default: 663 clh.Logger().WithField("function", "addDevice").Warnf("Add device of type %v is not supported.", v) 664 return fmt.Errorf("Not implemented support for %s", v) 665 } 666 667 return err 668 } 669 670 //########################################################################### 671 // 672 // Local helper methods related to the hypervisor interface implementation 673 // 674 //########################################################################### 675 676 func (clh *cloudHypervisor) Logger() *log.Entry { 677 return virtLog.WithField("subsystem", "cloudHypervisor") 678 } 679 680 // Adds all capabilities supported by cloudHypervisor implementation of hypervisor interface 681 func (clh *cloudHypervisor) capabilities() types.Capabilities { 682 span, _ := clh.trace("capabilities") 683 defer span.Finish() 684 685 clh.Logger().WithField("function", "capabilities").Info("get Capabilities") 686 var caps types.Capabilities 687 caps.SetFsSharingSupport() 688 caps.SetBlockDeviceHotplugSupport() 689 return caps 690 } 691 692 func (clh *cloudHypervisor) trace(name string) (opentracing.Span, context.Context) { 693 694 if clh.ctx == nil { 695 clh.Logger().WithField("type", "bug").Error("trace called before context set") 696 clh.ctx = context.Background() 697 } 698 699 span, ctx := opentracing.StartSpanFromContext(clh.ctx, name) 700 701 span.SetTag("subsystem", "cloudHypervisor") 702 span.SetTag("type", "clh") 703 704 return span, ctx 705 } 706 707 func (clh *cloudHypervisor) terminate() (err error) { 708 span, _ := clh.trace("terminate") 709 defer span.Finish() 710 711 pid := clh.state.PID 712 pidRunning := true 713 if pid == 0 { 714 pidRunning = false 715 } 716 717 clh.Logger().WithField("PID", pid).Info("Stopping Cloud Hypervisor") 718 719 if pidRunning { 720 clhRunning, _ := clh.isClhRunning(clhStopSandboxTimeout) 721 if clhRunning { 722 ctx, cancel := context.WithTimeout(context.Background(), clhStopSandboxTimeout*time.Second) 723 defer cancel() 724 if _, err = clh.client().ShutdownVMM(ctx); err != nil { 725 return err 726 } 727 } 728 } 729 730 // At this point the VMM was stop nicely, but need to check if PID is still running 731 // Wait for the VM process to terminate 732 tInit := time.Now() 733 for { 734 if err = syscall.Kill(pid, syscall.Signal(0)); err != nil { 735 pidRunning = false 736 break 737 } 738 739 if time.Since(tInit).Seconds() >= clhStopSandboxTimeout { 740 pidRunning = true 741 clh.Logger().Warnf("VM still running after waiting %ds", clhStopSandboxTimeout) 742 break 743 } 744 745 // Let's avoid to run a too busy loop 746 time.Sleep(time.Duration(50) * time.Millisecond) 747 } 748 749 // Let's try with a hammer now, a SIGKILL should get rid of the 750 // VM process. 751 if pidRunning { 752 if err = syscall.Kill(pid, syscall.SIGKILL); err != nil { 753 return fmt.Errorf("Fatal, failed to kill hypervisor process, error: %s", err) 754 } 755 } 756 757 if clh.virtiofsd == nil { 758 return errors.New("virtiofsd config is nil, failed to stop it") 759 } 760 761 if err := clh.cleanupVM(true); err != nil { 762 return err 763 } 764 765 return clh.virtiofsd.Stop() 766 } 767 768 func (clh *cloudHypervisor) reset() { 769 clh.state.reset() 770 } 771 772 func (clh *cloudHypervisor) generateSocket(id string, useVsock bool) (interface{}, error) { 773 if !useVsock { 774 return nil, fmt.Errorf("Can't generate hybrid vsocket for cloud-hypervisor: vsocks is disabled") 775 } 776 777 udsPath, err := clh.vsockSocketPath(id) 778 if err != nil { 779 clh.Logger().Info("Can't generate socket path for cloud-hypervisor") 780 return types.HybridVSock{}, err 781 } 782 783 return types.HybridVSock{ 784 UdsPath: udsPath, 785 Port: uint32(vSockPort), 786 }, nil 787 } 788 789 func (clh *cloudHypervisor) virtioFsSocketPath(id string) (string, error) { 790 return utils.BuildSocketPath(clh.store.RunVMStoragePath(), id, virtioFsSocket) 791 } 792 793 func (clh *cloudHypervisor) vsockSocketPath(id string) (string, error) { 794 return utils.BuildSocketPath(clh.store.RunVMStoragePath(), id, clhSocket) 795 } 796 797 func (clh *cloudHypervisor) apiSocketPath(id string) (string, error) { 798 return utils.BuildSocketPath(clh.store.RunVMStoragePath(), id, clhAPISocket) 799 } 800 801 func (clh *cloudHypervisor) waitVMM(timeout uint) error { 802 clhRunning, err := clh.isClhRunning(timeout) 803 804 if err != nil { 805 return err 806 } 807 808 if !clhRunning { 809 return fmt.Errorf("CLH is not running") 810 } 811 812 return nil 813 } 814 815 func (clh *cloudHypervisor) clhPath() (string, error) { 816 p, err := clh.config.HypervisorAssetPath() 817 if err != nil { 818 return "", err 819 } 820 821 if p == "" { 822 p = defaultClhPath 823 } 824 825 if _, err = os.Stat(p); os.IsNotExist(err) { 826 return "", fmt.Errorf("Cloud-Hypervisor path (%s) does not exist", p) 827 } 828 829 return p, nil 830 } 831 832 func (clh *cloudHypervisor) getAvailableVersion() error { 833 834 clhPath, err := clh.clhPath() 835 if err != nil { 836 return err 837 838 } 839 840 cmd := exec.Command(clhPath, "--version") 841 out, err := cmd.CombinedOutput() 842 if err != nil { 843 return err 844 } 845 846 words := strings.Fields(string(out)) 847 if len(words) != 2 { 848 return errors.New("Failed to parse cloud-hypervisor version response. Illegal length") 849 850 } 851 versionSplit := strings.SplitN(words[1], ".", -1) 852 if len(versionSplit) != 3 { 853 return errors.New("Failed to parse cloud-hypervisor version field. Illegal length") 854 855 } 856 857 // Remove 'v' prefix if has one 858 versionSplit[0] = strings.TrimLeft(versionSplit[0], "v") 859 major, err := strconv.ParseUint(versionSplit[0], 10, 64) 860 if err != nil { 861 return err 862 863 } 864 minor, err := strconv.ParseUint(versionSplit[1], 10, 64) 865 if err != nil { 866 return err 867 868 } 869 870 // revision could have aditional commit information separated by '-' 871 revisionSplit := strings.SplitN(versionSplit[2], "-", -1) 872 if len(revisionSplit) < 1 { 873 return errors.Errorf("Failed parse cloud-hypervisor revision %s", versionSplit[2]) 874 } 875 revision, err := strconv.ParseUint(revisionSplit[0], 10, 64) 876 if err != nil { 877 return err 878 } 879 880 clh.version = CloudHypervisorVersion{ 881 Major: int(major), 882 Minor: int(minor), 883 Revision: int(revision), 884 } 885 return nil 886 887 } 888 889 func (clh *cloudHypervisor) LaunchClh() (string, int, error) { 890 891 clhPath, err := clh.clhPath() 892 if err != nil { 893 return "", -1, err 894 } 895 896 args := []string{cscAPIsocket, clh.state.apiSocket} 897 if clh.config.Debug { 898 // Cloud hypervisor log levels 899 // 'v' occurrences increase the level 900 //0 => Error 901 //1 => Warn 902 //2 => Info 903 //3 => Debug 904 //4+ => Trace 905 // Use Info, the CI runs with debug enabled 906 // a high level of logging increases the boot time 907 // and in a nested environment this could increase 908 // the chances to fail because agent is not 909 // ready on time. 910 args = append(args, "-vv") 911 } 912 913 clh.Logger().WithField("path", clhPath).Info() 914 clh.Logger().WithField("args", strings.Join(args, " ")).Info() 915 916 cmdHypervisor := exec.Command(clhPath, args...) 917 var hypervisorOutput io.ReadCloser 918 if clh.config.Debug { 919 cmdHypervisor.Env = os.Environ() 920 cmdHypervisor.Env = append(cmdHypervisor.Env, "RUST_BACKTRACE=full") 921 // Get StdoutPipe only for debug, without debug golang will redirect to /dev/null 922 hypervisorOutput, err = cmdHypervisor.StdoutPipe() 923 if err != nil { 924 return "", -1, err 925 } 926 } 927 928 cmdHypervisor.Stderr = cmdHypervisor.Stdout 929 930 err = utils.StartCmd(cmdHypervisor) 931 if err != nil { 932 return "", -1, err 933 } 934 935 if err := clh.waitVMM(clhTimeout); err != nil { 936 clh.Logger().WithField("error", err).Warn("cloud-hypervisor init failed") 937 var output string 938 939 if hypervisorOutput != nil { 940 b, errRead := ioutil.ReadAll(hypervisorOutput) 941 if errRead != nil { 942 output = "failed to read hypervisor output to get error information" 943 } else { 944 output = string(b) 945 } 946 } else { 947 output = "Please enable hypervisor logging to get stdout information" 948 } 949 950 return output, -1, err 951 } 952 953 if clh.config.Debug { 954 cmdLogger := utils.NewProgramLogger("kata-hypervisor") 955 clh.Logger().Debugf("Starting process logger(%s) for hypervisor", cmdLogger) 956 if err := cmdLogger.StartLogger(hypervisorOutput); err != nil { 957 // Not critical to run a container, but output wont be logged 958 clh.Logger().Warnf("Failed start process logger(%s) %s", cmdLogger, err) 959 } 960 } 961 962 return "", cmdHypervisor.Process.Pid, nil 963 } 964 965 //########################################################################### 966 // 967 // Cloud-hypervisor CLI builder 968 // 969 //########################################################################### 970 971 const ( 972 cctOFF string = "Off" 973 cctNULL string = "Null" 974 cctTTY string = "Tty" 975 ) 976 977 const ( 978 cscAPIsocket string = "--api-socket" 979 ) 980 981 //**************************************** 982 // The kernel command line 983 //**************************************** 984 985 func kernelParamsToString(params []Param) string { 986 987 var paramBuilder strings.Builder 988 for _, p := range params { 989 paramBuilder.WriteString(p.Key) 990 if len(p.Value) > 0 { 991 992 paramBuilder.WriteString("=") 993 paramBuilder.WriteString(p.Value) 994 } 995 paramBuilder.WriteString(" ") 996 } 997 return strings.TrimSpace(paramBuilder.String()) 998 } 999 1000 //**************************************** 1001 // API calls 1002 //**************************************** 1003 func (clh *cloudHypervisor) isClhRunning(timeout uint) (bool, error) { 1004 1005 pid := clh.state.PID 1006 1007 // Check if clh process is running, in case it is not, let's 1008 // return from here. 1009 if err := syscall.Kill(pid, syscall.Signal(0)); err != nil { 1010 return false, nil 1011 } 1012 1013 timeStart := time.Now() 1014 cl := clh.client() 1015 for { 1016 ctx, cancel := context.WithTimeout(context.Background(), clhAPITimeout*time.Second) 1017 defer cancel() 1018 _, _, err := cl.VmmPingGet(ctx) 1019 if err == nil { 1020 return true, nil 1021 } 1022 1023 if time.Since(timeStart).Seconds() > float64(timeout) { 1024 return false, fmt.Errorf("Failed to connect to API (timeout %ds): %s", timeout, openAPIClientError(err)) 1025 } 1026 1027 time.Sleep(time.Duration(10) * time.Millisecond) 1028 } 1029 1030 } 1031 1032 func (clh *cloudHypervisor) client() clhClient { 1033 if clh.APIClient == nil { 1034 clh.APIClient = clh.newAPIClient() 1035 } 1036 1037 return clh.APIClient 1038 } 1039 1040 func (clh *cloudHypervisor) newAPIClient() *chclient.DefaultApiService { 1041 1042 cfg := chclient.NewConfiguration() 1043 1044 socketTransport := &http.Transport{ 1045 DialContext: func(ctx context.Context, network, path string) (net.Conn, error) { 1046 addr, err := net.ResolveUnixAddr("unix", clh.state.apiSocket) 1047 if err != nil { 1048 return nil, err 1049 1050 } 1051 1052 return net.DialUnix("unix", nil, addr) 1053 }, 1054 } 1055 1056 cfg.HTTPClient = http.DefaultClient 1057 cfg.HTTPClient.Transport = socketTransport 1058 1059 return chclient.NewAPIClient(cfg).DefaultApi 1060 } 1061 1062 func openAPIClientError(err error) error { 1063 1064 if err == nil { 1065 return nil 1066 } 1067 1068 reason := "" 1069 if apierr, ok := err.(chclient.GenericOpenAPIError); ok { 1070 reason = string(apierr.Body()) 1071 } 1072 1073 return fmt.Errorf("error: %v reason: %s", err, reason) 1074 } 1075 1076 func (clh *cloudHypervisor) bootVM(ctx context.Context) error { 1077 1078 cl := clh.client() 1079 1080 if clh.config.Debug { 1081 bodyBuf, err := json.Marshal(clh.vmconfig) 1082 if err != nil { 1083 return err 1084 } 1085 clh.Logger().WithField("body", string(bodyBuf)).Debug("VM config") 1086 } 1087 _, err := cl.CreateVM(ctx, clh.vmconfig) 1088 1089 if err != nil { 1090 return openAPIClientError(err) 1091 } 1092 1093 info, err := clh.vmInfo() 1094 1095 if err != nil { 1096 return err 1097 } 1098 1099 clh.Logger().Debugf("VM state after create: %#v", info) 1100 1101 if info.State != clhStateCreated { 1102 return fmt.Errorf("VM state is not 'Created' after 'CreateVM'") 1103 } 1104 1105 clh.Logger().Debug("Booting VM") 1106 _, err = cl.BootVM(ctx) 1107 1108 if err != nil { 1109 return openAPIClientError(err) 1110 } 1111 1112 info, err = clh.vmInfo() 1113 1114 if err != nil { 1115 return err 1116 } 1117 1118 clh.Logger().Debugf("VM state after boot: %#v", info) 1119 1120 if info.State != clhStateRunning { 1121 return fmt.Errorf("VM state is not 'Running' after 'BootVM'") 1122 } 1123 1124 return nil 1125 } 1126 1127 func (clh *cloudHypervisor) addVSock(cid int64, path string) { 1128 clh.Logger().WithFields(log.Fields{ 1129 "path": path, 1130 "cid": cid, 1131 }).Info("Adding HybridVSock") 1132 1133 clh.vmconfig.Vsock = chclient.VsockConfig{Cid: cid, Sock: path} 1134 } 1135 1136 func (clh *cloudHypervisor) addNet(e Endpoint) error { 1137 clh.Logger().WithField("endpoint-type", e).Debugf("Adding Endpoint of type %v", e) 1138 1139 mac := e.HardwareAddr() 1140 netPair := e.NetworkPair() 1141 1142 if netPair == nil { 1143 return errors.New("net Pair to be added is nil, needed to get TAP path") 1144 } 1145 1146 tapPath := netPair.TapInterface.TAPIface.Name 1147 1148 if tapPath == "" { 1149 return errors.New("TAP path in network pair is empty") 1150 } 1151 1152 clh.Logger().WithFields(log.Fields{ 1153 "mac": mac, 1154 "tap": tapPath, 1155 }).Info("Adding Net") 1156 1157 clh.vmconfig.Net = append(clh.vmconfig.Net, chclient.NetConfig{Mac: mac, Tap: tapPath}) 1158 return nil 1159 } 1160 1161 // Add shared Volume using virtiofs 1162 func (clh *cloudHypervisor) addVolume(volume types.Volume) error { 1163 if clh.config.SharedFS != config.VirtioFS { 1164 return fmt.Errorf("shared fs method not supported %s", clh.config.SharedFS) 1165 } 1166 1167 vfsdSockPath, err := clh.virtioFsSocketPath(clh.id) 1168 if err != nil { 1169 return err 1170 } 1171 1172 if clh.config.VirtioFSCache == virtioFsCacheAlways { 1173 clh.vmconfig.Fs = []chclient.FsConfig{ 1174 { 1175 Tag: volume.MountTag, 1176 CacheSize: int64(clh.config.VirtioFSCacheSize << 20), 1177 Sock: vfsdSockPath, 1178 }, 1179 } 1180 } else { 1181 clh.vmconfig.Fs = []chclient.FsConfig{ 1182 { 1183 Tag: volume.MountTag, 1184 Sock: vfsdSockPath, 1185 }, 1186 } 1187 1188 } 1189 1190 clh.Logger().Debug("Adding share volume to hypervisor: ", volume.MountTag) 1191 return nil 1192 } 1193 1194 // cleanupVM will remove generated files and directories related with the virtual machine 1195 func (clh *cloudHypervisor) cleanupVM(force bool) error { 1196 1197 if clh.id == "" { 1198 return errors.New("Hypervisor ID is empty") 1199 } 1200 1201 clh.Logger().Debug("removing vm sockets") 1202 1203 path, err := clh.vsockSocketPath(clh.id) 1204 if err == nil { 1205 if err := os.Remove(path); err != nil { 1206 clh.Logger().WithField("path", path).Warn("removing vm socket failed") 1207 } 1208 } 1209 1210 // cleanup vm path 1211 dir := filepath.Join(clh.store.RunVMStoragePath(), clh.id) 1212 1213 // If it's a symlink, remove both dir and the target. 1214 link, err := filepath.EvalSymlinks(dir) 1215 if err != nil { 1216 clh.Logger().WithError(err).WithField("dir", dir).Warn("failed to resolve vm path") 1217 } 1218 1219 clh.Logger().WithFields(log.Fields{ 1220 "link": link, 1221 "dir": dir, 1222 }).Infof("cleanup vm path") 1223 1224 if err := os.RemoveAll(dir); err != nil { 1225 if !force { 1226 return err 1227 } 1228 clh.Logger().WithError(err).Warnf("failed to remove vm path %s", dir) 1229 } 1230 if link != dir && link != "" { 1231 if err := os.RemoveAll(link); err != nil { 1232 if !force { 1233 return err 1234 } 1235 clh.Logger().WithError(err).WithField("link", link).Warn("failed to remove resolved vm path") 1236 } 1237 } 1238 1239 if clh.config.VMid != "" { 1240 dir = filepath.Join(clh.store.RunStoragePath(), clh.config.VMid) 1241 if err := os.RemoveAll(dir); err != nil { 1242 if !force { 1243 return err 1244 } 1245 clh.Logger().WithError(err).WithField("path", dir).Warnf("failed to remove vm path") 1246 } 1247 } 1248 1249 clh.reset() 1250 1251 return nil 1252 } 1253 1254 // vmInfo ask to hypervisor for current VM status 1255 func (clh *cloudHypervisor) vmInfo() (chclient.VmInfo, error) { 1256 cl := clh.client() 1257 ctx, cancelInfo := context.WithTimeout(context.Background(), clhAPITimeout*time.Second) 1258 defer cancelInfo() 1259 1260 info, _, err := cl.VmInfoGet(ctx) 1261 if err != nil { 1262 clh.Logger().WithError(openAPIClientError(err)).Warn("VmInfoGet failed") 1263 } 1264 return info, openAPIClientError(err) 1265 1266 }