github.com/kata-containers/runtime@v0.0.0-20210505125100-04f29832a923/virtcontainers/fc.go (about) 1 // Copyright (c) 2018 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 package virtcontainers 7 8 import ( 9 "bufio" 10 "context" 11 "encoding/json" 12 "fmt" 13 "io" 14 "io/ioutil" 15 "net" 16 "net/http" 17 "os" 18 "os/exec" 19 "path/filepath" 20 "strconv" 21 "strings" 22 "sync" 23 "syscall" 24 "time" 25 26 "github.com/containerd/fifo" 27 httptransport "github.com/go-openapi/runtime/client" 28 "github.com/go-openapi/strfmt" 29 kataclient "github.com/kata-containers/agent/protocols/client" 30 persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" 31 "github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client" 32 models "github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client/models" 33 ops "github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client/operations" 34 "github.com/opencontainers/selinux/go-selinux/label" 35 opentracing "github.com/opentracing/opentracing-go" 36 "github.com/pkg/errors" 37 "github.com/sirupsen/logrus" 38 39 "github.com/blang/semver" 40 "github.com/containerd/console" 41 "github.com/kata-containers/runtime/virtcontainers/device/config" 42 "github.com/kata-containers/runtime/virtcontainers/types" 43 "github.com/kata-containers/runtime/virtcontainers/utils" 44 ) 45 46 type vmmState uint8 47 48 const ( 49 notReady vmmState = iota 50 cfReady 51 vmReady 52 ) 53 54 const ( 55 //fcTimeout is the maximum amount of time in seconds to wait for the VMM to respond 56 fcTimeout = 10 57 fcSocket = "firecracker.socket" 58 //Name of the files within jailer root 59 //Having predefined names helps with cleanup 60 fcKernel = "vmlinux" 61 fcInitrd = "initrd" 62 fcRootfs = "rootfs" 63 fcStopSandboxTimeout = 15 64 // This indicates the number of block devices that can be attached to the 65 // firecracker guest VM. 66 // We attach a pool of placeholder drives before the guest has started, and then 67 // patch the replace placeholder drives with drives with actual contents. 68 fcDiskPoolSize = 8 69 defaultHybridVSocketName = "kata.hvsock" 70 71 // This is the first usable vsock context ID. All the vsocks can use the same 72 // ID, since it's only used in the guest. 73 defaultGuestVSockCID = int64(0x3) 74 75 // This is related to firecracker logging scheme 76 fcLogFifo = "logs.fifo" 77 fcMetricsFifo = "metrics.fifo" 78 79 defaultFcConfig = "fcConfig.json" 80 // storagePathSuffix mirrors persist/fs/fs.go:storagePathSuffix 81 storagePathSuffix = "vc" 82 ) 83 84 // Specify the minimum version of firecracker supported 85 var fcMinSupportedVersion = semver.MustParse("0.21.1") 86 87 var fcKernelParams = []Param{ 88 // The boot source is the first partition of the first block device added 89 {"pci", "off"}, 90 {"reboot", "k"}, 91 {"panic", "1"}, 92 {"iommu", "off"}, 93 {"net.ifnames", "0"}, 94 {"random.trust_cpu", "on"}, 95 96 // Firecracker doesn't support ACPI 97 // Fix kernel error "ACPI BIOS Error (bug)" 98 {"acpi", "off"}, 99 } 100 101 func (s vmmState) String() string { 102 switch s { 103 case notReady: 104 return "FC not ready" 105 case cfReady: 106 return "FC configure ready" 107 case vmReady: 108 return "FC VM ready" 109 } 110 111 return "" 112 } 113 114 // FirecrackerInfo contains information related to the hypervisor that we 115 // want to store on disk 116 type FirecrackerInfo struct { 117 PID int 118 Version string 119 } 120 121 type firecrackerState struct { 122 sync.RWMutex 123 state vmmState 124 } 125 126 func (s *firecrackerState) set(state vmmState) { 127 s.Lock() 128 defer s.Unlock() 129 130 s.state = state 131 } 132 133 // firecracker is an Hypervisor interface implementation for the firecracker VMM. 134 type firecracker struct { 135 id string //Unique ID per pod. Maps to the truncated sandbox id 136 vmPath string //All jailed VM assets need to be under this 137 chrootBaseDir string //chroot base for the jailer 138 jailerRoot string 139 socketPath string 140 netNSPath string 141 uid string //UID and GID to be used for the VMM 142 gid string 143 144 info FirecrackerInfo 145 146 firecrackerd *exec.Cmd //Tracks the firecracker process itself 147 connection *client.Firecracker //Tracks the current active connection 148 149 ctx context.Context 150 config HypervisorConfig 151 pendingDevices []firecrackerDevice // Devices to be added before the FC VM ready 152 153 state firecrackerState 154 jailed bool //Set to true if jailer is enabled 155 stateful bool //Set to true if running with shimv2 156 157 fcConfigPath string 158 fcConfig *types.FcConfig // Parameters configured before VM starts 159 160 hotplugDriveOffset int 161 } 162 163 type firecrackerDevice struct { 164 dev interface{} 165 devType deviceType 166 } 167 168 // Logger returns a logrus logger appropriate for logging firecracker messages 169 func (fc *firecracker) Logger() *logrus.Entry { 170 return virtLog.WithField("subsystem", "firecracker") 171 } 172 173 func (fc *firecracker) trace(name string) (opentracing.Span, context.Context) { 174 if fc.ctx == nil { 175 fc.Logger().WithField("type", "bug").Error("trace called before context set") 176 fc.ctx = context.Background() 177 } 178 179 span, ctx := opentracing.StartSpanFromContext(fc.ctx, name) 180 181 span.SetTag("subsystem", "hypervisor") 182 span.SetTag("type", "firecracker") 183 184 return span, ctx 185 } 186 187 //At some cases, when sandbox id is too long, it will incur error of overlong 188 //firecracker API unix socket(fc.socketPath). 189 //In Linux, sun_path could maximumly contains 108 bytes in size. 190 //(http://man7.org/linux/man-pages/man7/unix.7.html) 191 func (fc *firecracker) truncateID(id string) string { 192 if len(id) > 32 { 193 //truncate the id to only leave the size of UUID(128bit). 194 return id[:32] 195 } 196 197 return id 198 } 199 200 // For firecracker this call only sets the internal structure up. 201 // The sandbox will be created and started through startSandbox(). 202 func (fc *firecracker) createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig, stateful bool) error { 203 fc.ctx = ctx 204 205 span, _ := fc.trace("createSandbox") 206 defer span.Finish() 207 208 //TODO: check validity of the hypervisor config provided 209 //https://github.com/kata-containers/runtime/issues/1065 210 fc.id = fc.truncateID(id) 211 fc.state.set(notReady) 212 fc.config = *hypervisorConfig 213 fc.stateful = stateful 214 215 // When running with jailer all resources need to be under 216 // a specific location and that location needs to have 217 // exec permission (i.e. should not be mounted noexec, e.g. /run, /var/run) 218 // Also unix domain socket names have a hard limit 219 // #define UNIX_PATH_MAX 108 220 // Keep it short and live within the jailer expected paths 221 // <chroot_base>/<exec_file_name>/<id>/ 222 // Also jailer based on the id implicitly sets up cgroups under 223 // <cgroups_base>/<exec_file_name>/<id>/ 224 hypervisorName := filepath.Base(hypervisorConfig.HypervisorPath) 225 //fs.RunStoragePath cannot be used as we need exec perms 226 fc.chrootBaseDir = filepath.Join("/run", storagePathSuffix) 227 228 fc.vmPath = filepath.Join(fc.chrootBaseDir, hypervisorName, fc.id) 229 fc.jailerRoot = filepath.Join(fc.vmPath, "root") // auto created by jailer 230 231 // Firecracker and jailer automatically creates default API socket under /run 232 // with the name of "firecracker.socket" 233 fc.socketPath = filepath.Join(fc.jailerRoot, "run", fcSocket) 234 235 // So we need to repopulate this at startSandbox where it is valid 236 fc.netNSPath = networkNS.NetNsPath 237 238 // Till we create lower privileged kata user run as root 239 // https://github.com/kata-containers/runtime/issues/1869 240 fc.uid = "0" 241 fc.gid = "0" 242 243 fc.fcConfig = &types.FcConfig{} 244 fc.fcConfigPath = filepath.Join(fc.vmPath, defaultFcConfig) 245 return nil 246 } 247 248 func (fc *firecracker) newFireClient() *client.Firecracker { 249 span, _ := fc.trace("newFireClient") 250 defer span.Finish() 251 httpClient := client.NewHTTPClient(strfmt.NewFormats()) 252 253 socketTransport := &http.Transport{ 254 DialContext: func(ctx context.Context, network, path string) (net.Conn, error) { 255 addr, err := net.ResolveUnixAddr("unix", fc.socketPath) 256 if err != nil { 257 return nil, err 258 } 259 260 return net.DialUnix("unix", nil, addr) 261 }, 262 } 263 264 transport := httptransport.New(client.DefaultHost, client.DefaultBasePath, client.DefaultSchemes) 265 transport.SetLogger(fc.Logger()) 266 transport.SetDebug(fc.Logger().Logger.Level == logrus.DebugLevel) 267 transport.Transport = socketTransport 268 httpClient.SetTransport(transport) 269 270 return httpClient 271 } 272 273 func (fc *firecracker) vmRunning() bool { 274 resp, err := fc.client().Operations.DescribeInstance(nil) 275 if err != nil { 276 fc.Logger().WithError(err).Error("getting vm status failed") 277 return false 278 } 279 280 // Be explicit 281 switch *resp.Payload.State { 282 case models.InstanceInfoStateStarting: 283 // Unsure what we should do here 284 fc.Logger().WithField("unexpected-state", models.InstanceInfoStateStarting).Debug("vmRunning") 285 return false 286 case models.InstanceInfoStateRunning: 287 return true 288 case models.InstanceInfoStateUninitialized: 289 return false 290 default: 291 return false 292 } 293 } 294 295 func (fc *firecracker) getVersionNumber() (string, error) { 296 args := []string{"--version"} 297 checkCMD := exec.Command(fc.config.HypervisorPath, args...) 298 299 data, err := checkCMD.Output() 300 if err != nil { 301 return "", fmt.Errorf("Running checking FC version command failed: %v", err) 302 } 303 304 var version string 305 fields := strings.Split(string(data), " ") 306 if len(fields) > 1 { 307 // The output format of `Firecracker --verion` is as follows 308 // Firecracker v0.21.1 309 version = strings.TrimPrefix(strings.TrimSpace(fields[1]), "v") 310 return version, nil 311 } 312 313 return "", errors.New("getting FC version failed, the output is malformed") 314 } 315 316 func (fc *firecracker) checkVersion(version string) error { 317 v, err := semver.Make(version) 318 if err != nil { 319 return fmt.Errorf("Malformed firecracker version: %v", err) 320 } 321 322 if v.LT(fcMinSupportedVersion) { 323 return fmt.Errorf("version %v is not supported. Minimum supported version of firecracker is %v", v.String(), fcMinSupportedVersion.String()) 324 } 325 326 return nil 327 } 328 329 // waitVMMRunning will wait for timeout seconds for the VMM to be up and running. 330 func (fc *firecracker) waitVMMRunning(timeout int) error { 331 span, _ := fc.trace("wait VMM to be running") 332 defer span.Finish() 333 334 if timeout < 0 { 335 return fmt.Errorf("Invalid timeout %ds", timeout) 336 } 337 338 timeStart := time.Now() 339 for { 340 if fc.vmRunning() { 341 return nil 342 } 343 344 if int(time.Since(timeStart).Seconds()) > timeout { 345 return fmt.Errorf("Failed to connect to firecrackerinstance (timeout %ds)", timeout) 346 } 347 348 time.Sleep(time.Duration(10) * time.Millisecond) 349 } 350 } 351 352 func (fc *firecracker) fcInit(timeout int) error { 353 span, _ := fc.trace("fcInit") 354 defer span.Finish() 355 356 var err error 357 //FC version set and check 358 if fc.info.Version, err = fc.getVersionNumber(); err != nil { 359 return err 360 } 361 362 if err := fc.checkVersion(fc.info.Version); err != nil { 363 return err 364 } 365 366 var cmd *exec.Cmd 367 var args []string 368 369 if fc.fcConfigPath, err = fc.fcJailResource(fc.fcConfigPath, defaultFcConfig); err != nil { 370 return err 371 } 372 373 if !fc.config.Debug && fc.stateful { 374 args = append(args, "--daemonize") 375 } 376 377 //https://github.com/firecracker-microvm/firecracker/blob/master/docs/jailer.md#jailer-usage 378 //--seccomp-level specifies whether seccomp filters should be installed and how restrictive they should be. Possible values are: 379 //0 : disabled. 380 //1 : basic filtering. This prohibits syscalls not whitelisted by Firecracker. 381 //2 (default): advanced filtering. This adds further checks on some of the parameters of the allowed syscalls. 382 if fc.jailed { 383 jailedArgs := []string{ 384 "--id", fc.id, 385 "--node", "0", //FIXME: Comprehend NUMA topology or explicit ignore 386 "--exec-file", fc.config.HypervisorPath, 387 "--uid", "0", //https://github.com/kata-containers/runtime/issues/1869 388 "--gid", "0", 389 "--chroot-base-dir", fc.chrootBaseDir, 390 } 391 args = append(args, jailedArgs...) 392 if fc.netNSPath != "" { 393 args = append(args, "--netns", fc.netNSPath) 394 } 395 args = append(args, "--", "--config-file", fc.fcConfigPath) 396 397 cmd = exec.Command(fc.config.JailerPath, args...) 398 } else { 399 args = append(args, 400 "--api-sock", fc.socketPath, 401 "--config-file", fc.fcConfigPath) 402 cmd = exec.Command(fc.config.HypervisorPath, args...) 403 } 404 405 if fc.config.Debug && fc.stateful { 406 stdin, err := fc.watchConsole() 407 if err != nil { 408 return err 409 } 410 411 cmd.Stderr = stdin 412 cmd.Stdout = stdin 413 } 414 415 fc.Logger().WithField("hypervisor args", args).Debug() 416 fc.Logger().WithField("hypervisor cmd", cmd).Debug() 417 418 fc.Logger().Info("Starting VM") 419 if err := cmd.Start(); err != nil { 420 fc.Logger().WithField("Error starting firecracker", err).Debug() 421 return err 422 } 423 424 fc.info.PID = cmd.Process.Pid 425 fc.firecrackerd = cmd 426 fc.connection = fc.newFireClient() 427 428 if err := fc.waitVMMRunning(timeout); err != nil { 429 fc.Logger().WithField("fcInit failed:", err).Debug() 430 return err 431 } 432 return nil 433 } 434 435 func (fc *firecracker) fcEnd() (err error) { 436 span, _ := fc.trace("fcEnd") 437 defer span.Finish() 438 439 fc.Logger().Info("Stopping firecracker VM") 440 441 defer func() { 442 if err != nil { 443 fc.Logger().Info("fcEnd failed") 444 } else { 445 fc.Logger().Info("Firecracker VM stopped") 446 } 447 }() 448 449 pid := fc.info.PID 450 451 // Send a SIGTERM to the VM process to try to stop it properly 452 if err = syscall.Kill(pid, syscall.SIGTERM); err != nil { 453 if err == syscall.ESRCH { 454 return nil 455 } 456 return err 457 } 458 459 // Wait for the VM process to terminate 460 tInit := time.Now() 461 for { 462 if err = syscall.Kill(pid, syscall.Signal(0)); err != nil { 463 return nil 464 } 465 466 if time.Since(tInit).Seconds() >= fcStopSandboxTimeout { 467 fc.Logger().Warnf("VM still running after waiting %ds", fcStopSandboxTimeout) 468 break 469 } 470 471 // Let's avoid to run a too busy loop 472 time.Sleep(time.Duration(50) * time.Millisecond) 473 } 474 475 // Let's try with a hammer now, a SIGKILL should get rid of the 476 // VM process. 477 return syscall.Kill(pid, syscall.SIGKILL) 478 } 479 480 func (fc *firecracker) client() *client.Firecracker { 481 span, _ := fc.trace("client") 482 defer span.Finish() 483 484 if fc.connection == nil { 485 fc.connection = fc.newFireClient() 486 } 487 488 return fc.connection 489 } 490 491 func (fc *firecracker) createJailedDrive(name string) (string, error) { 492 // Don't bind mount the resource, just create a raw file 493 // that can be bind-mounted later 494 r := filepath.Join(fc.jailerRoot, name) 495 f, err := os.Create(r) 496 if err != nil { 497 return "", err 498 } 499 f.Close() 500 501 if fc.jailed { 502 // use path relative to the jail 503 r = filepath.Join("/", name) 504 } 505 506 return r, nil 507 } 508 509 // when running with jailer, firecracker binary will firstly be copied into fc.jailerRoot, 510 // and then being executed there. Therefore we need to ensure fc.JailerRoot has exec permissions. 511 func (fc *firecracker) fcRemountJailerRootWithExec() error { 512 if err := bindMount(context.Background(), fc.jailerRoot, fc.jailerRoot, false, "shared"); err != nil { 513 fc.Logger().WithField("JailerRoot", fc.jailerRoot).Errorf("bindMount failed: %v", err) 514 return err 515 } 516 517 // /run is normally mounted with rw, nosuid(MS_NOSUID), relatime(MS_RELATIME), noexec(MS_NOEXEC). 518 // we re-mount jailerRoot to deliberately leave out MS_NOEXEC. 519 if err := remount(context.Background(), syscall.MS_NOSUID|syscall.MS_RELATIME, fc.jailerRoot); err != nil { 520 fc.Logger().WithField("JailerRoot", fc.jailerRoot).Errorf("Re-mount failed: %v", err) 521 return err 522 } 523 524 return nil 525 } 526 527 func (fc *firecracker) fcJailResource(src, dst string) (string, error) { 528 if src == "" || dst == "" { 529 return "", fmt.Errorf("fcJailResource: invalid jail locations: src:%v, dst:%v", 530 src, dst) 531 } 532 jailedLocation := filepath.Join(fc.jailerRoot, dst) 533 if err := bindMount(context.Background(), src, jailedLocation, false, "slave"); err != nil { 534 fc.Logger().WithField("bindMount failed", err).Error() 535 return "", err 536 } 537 538 if !fc.jailed { 539 return jailedLocation, nil 540 } 541 542 // This is the path within the jailed root 543 absPath := filepath.Join("/", dst) 544 return absPath, nil 545 } 546 547 func (fc *firecracker) fcSetBootSource(kernelPath, initrdPath, params string) error { 548 span, _ := fc.trace("fcSetBootSource") 549 defer span.Finish() 550 fc.Logger().WithFields(logrus.Fields{ 551 "kernel-path": kernelPath, 552 "initrd-path": initrdPath, 553 "kernel-params": params, 554 }).Debug("fcSetBootSource") 555 556 kernelPath, err := fc.fcJailResource(kernelPath, fcKernel) 557 if err != nil { 558 return err 559 } 560 561 if initrdPath != "" { 562 initrdPath, err = fc.fcJailResource(initrdPath, fcInitrd) 563 if err != nil { 564 return err 565 } 566 } 567 568 src := &models.BootSource{ 569 KernelImagePath: &kernelPath, 570 BootArgs: params, 571 InitrdPath: initrdPath, 572 } 573 574 fc.fcConfig.BootSource = src 575 576 return nil 577 } 578 579 func (fc *firecracker) fcSetVMRootfs(path string) error { 580 span, _ := fc.trace("fcSetVMRootfs") 581 defer span.Finish() 582 583 jailedRootfs, err := fc.fcJailResource(path, fcRootfs) 584 if err != nil { 585 return err 586 } 587 588 driveID := "rootfs" 589 isReadOnly := true 590 //Add it as a regular block device 591 //This allows us to use a partitoned root block device 592 isRootDevice := false 593 // This is the path within the jailed root 594 drive := &models.Drive{ 595 DriveID: &driveID, 596 IsReadOnly: &isReadOnly, 597 IsRootDevice: &isRootDevice, 598 PathOnHost: &jailedRootfs, 599 } 600 601 fc.fcConfig.Drives = append(fc.fcConfig.Drives, drive) 602 603 return nil 604 } 605 606 func (fc *firecracker) fcSetVMBaseConfig(mem int64, vcpus int64, htEnabled bool) { 607 span, _ := fc.trace("fcSetVMBaseConfig") 608 defer span.Finish() 609 fc.Logger().WithFields(logrus.Fields{"mem": mem, 610 "vcpus": vcpus, 611 "htEnabled": htEnabled}).Debug("fcSetVMBaseConfig") 612 613 cfg := &models.MachineConfiguration{ 614 HtEnabled: &htEnabled, 615 MemSizeMib: &mem, 616 VcpuCount: &vcpus, 617 } 618 619 fc.fcConfig.MachineConfig = cfg 620 } 621 622 func (fc *firecracker) fcSetLogger() error { 623 span, _ := fc.trace("fcSetLogger") 624 defer span.Finish() 625 626 fcLogLevel := "Error" 627 if fc.config.Debug { 628 fcLogLevel = "Debug" 629 } 630 631 // listen to log fifo file and transfer error info 632 jailedLogFifo, err := fc.fcListenToFifo(fcLogFifo) 633 if err != nil { 634 return fmt.Errorf("Failed setting log: %s", err) 635 } 636 637 // listen to metrics file and transfer error info 638 jailedMetricsFifo, err := fc.fcListenToFifo(fcMetricsFifo) 639 if err != nil { 640 return fmt.Errorf("Failed setting log: %s", err) 641 } 642 643 fc.fcConfig.Logger = &models.Logger{ 644 Level: &fcLogLevel, 645 LogFifo: &jailedLogFifo, 646 MetricsFifo: &jailedMetricsFifo, 647 } 648 649 return err 650 } 651 652 func (fc *firecracker) fcListenToFifo(fifoName string) (string, error) { 653 fcFifoPath := filepath.Join(fc.vmPath, fifoName) 654 fcFifo, err := fifo.OpenFifo(context.Background(), fcFifoPath, syscall.O_CREAT|syscall.O_RDONLY|syscall.O_NONBLOCK, 0) 655 if err != nil { 656 return "", fmt.Errorf("Failed to open/create fifo file %s", err) 657 } 658 659 jailedFifoPath, err := fc.fcJailResource(fcFifoPath, fifoName) 660 if err != nil { 661 return "", err 662 } 663 664 go func() { 665 scanner := bufio.NewScanner(fcFifo) 666 for scanner.Scan() { 667 fc.Logger().WithFields(logrus.Fields{ 668 "fifoName": fifoName, 669 "contents": scanner.Text()}).Error("firecracker failed") 670 } 671 672 if err := scanner.Err(); err != nil { 673 fc.Logger().WithError(err).Errorf("Failed reading firecracker fifo file") 674 } 675 676 if err := fcFifo.Close(); err != nil { 677 fc.Logger().WithError(err).Errorf("Failed closing firecracker fifo file") 678 } 679 }() 680 681 return jailedFifoPath, nil 682 } 683 684 func (fc *firecracker) fcInitConfiguration() error { 685 // Firecracker API socket(firecracker.socket) is automatically created 686 // under /run dir. 687 err := os.MkdirAll(filepath.Join(fc.jailerRoot, "run"), DirMode) 688 if err != nil { 689 return err 690 } 691 defer func() { 692 if err != nil { 693 if err := os.RemoveAll(fc.vmPath); err != nil { 694 fc.Logger().WithError(err).Error("Fail to clean up vm directory") 695 } 696 } 697 }() 698 699 if fc.config.JailerPath != "" { 700 fc.jailed = true 701 if err := fc.fcRemountJailerRootWithExec(); err != nil { 702 return err 703 } 704 } 705 706 fc.fcSetVMBaseConfig(int64(fc.config.MemorySize), 707 int64(fc.config.NumVCPUs), false) 708 709 kernelPath, err := fc.config.KernelAssetPath() 710 if err != nil { 711 return err 712 } 713 714 if fc.config.Debug && fc.stateful { 715 fcKernelParams = append(fcKernelParams, Param{"console", "ttyS0"}) 716 } else { 717 fcKernelParams = append(fcKernelParams, []Param{ 718 {"8250.nr_uarts", "0"}, 719 // Tell agent where to send the logs 720 {"agent.log_vport", fmt.Sprintf("%d", vSockLogsPort)}, 721 }...) 722 } 723 724 initrdPath, err := fc.config.InitrdAssetPath() 725 if err != nil { 726 return err 727 } 728 729 kernelParams := append(fc.config.KernelParams, fcKernelParams...) 730 if initrdPath == "" { 731 kernelParams = append(kernelParams, commonVirtioblkKernelRootParams...) 732 } 733 strParams := SerializeParams(kernelParams, "=") 734 formattedParams := strings.Join(strParams, " ") 735 if err := fc.fcSetBootSource(kernelPath, initrdPath, formattedParams); err != nil { 736 return err 737 } 738 739 if initrdPath == "" { 740 image, err := fc.config.ImageAssetPath() 741 if err != nil { 742 return err 743 } 744 745 if err := fc.fcSetVMRootfs(image); err != nil { 746 return err 747 } 748 } 749 750 fc.hotplugDriveOffset = len(fc.fcConfig.Drives) 751 752 if err := fc.createDiskPool(); err != nil { 753 return err 754 } 755 756 if err := fc.fcSetLogger(); err != nil { 757 return err 758 } 759 760 fc.state.set(cfReady) 761 for _, d := range fc.pendingDevices { 762 if err := fc.addDevice(d.dev, d.devType); err != nil { 763 return err 764 } 765 } 766 767 return nil 768 } 769 770 // startSandbox will start the hypervisor for the given sandbox. 771 // In the context of firecracker, this will start the hypervisor, 772 // for configuration, but not yet start the actual virtual machine 773 func (fc *firecracker) startSandbox(timeout int) error { 774 span, _ := fc.trace("startSandbox") 775 defer span.Finish() 776 777 if err := fc.fcInitConfiguration(); err != nil { 778 return err 779 } 780 781 data, errJSON := json.MarshalIndent(fc.fcConfig, "", "\t") 782 if errJSON != nil { 783 return errJSON 784 } 785 786 if err := ioutil.WriteFile(fc.fcConfigPath, data, 0640); err != nil { 787 return err 788 } 789 790 var err error 791 defer func() { 792 if err != nil { 793 fc.fcEnd() 794 } 795 }() 796 797 // This needs to be done as late as possible, since all processes that 798 // are executed by kata-runtime after this call, run with the SELinux 799 // label. If these processes require privileged, we do not want to run 800 // them under confinement. 801 if err := label.SetProcessLabel(fc.config.SELinuxProcessLabel); err != nil { 802 return err 803 } 804 defer label.SetProcessLabel("") 805 806 err = fc.fcInit(fcTimeout) 807 if err != nil { 808 return err 809 } 810 811 // make sure 'others' don't have access to this socket 812 err = os.Chmod(filepath.Join(fc.jailerRoot, defaultHybridVSocketName), 0640) 813 if err != nil { 814 return fmt.Errorf("Could not change socket permissions: %v", err) 815 } 816 817 fc.state.set(vmReady) 818 return nil 819 } 820 821 func fcDriveIndexToID(i int) string { 822 return "drive_" + strconv.Itoa(i) 823 } 824 825 // Creates a disk pool to attach container virtio-block devices with 826 // fcUpdateBlockDrive 827 func (fc *firecracker) createDiskPool() error { 828 span, _ := fc.trace("createDiskPool") 829 defer span.Finish() 830 831 for i := 0; i < fcDiskPoolSize; i++ { 832 driveID := fcDriveIndexToID(i) 833 isReadOnly := false 834 isRootDevice := false 835 836 // Create a temporary file as a placeholder backend for the drive 837 jailedDrive, err := fc.createJailedDrive(driveID) 838 if err != nil { 839 return err 840 } 841 842 drive := &models.Drive{ 843 DriveID: &driveID, 844 IsReadOnly: &isReadOnly, 845 IsRootDevice: &isRootDevice, 846 PathOnHost: &jailedDrive, 847 } 848 849 fc.fcConfig.Drives = append(fc.fcConfig.Drives, drive) 850 } 851 852 return nil 853 } 854 855 func (fc *firecracker) umountResource(jailedPath string) { 856 hostPath := filepath.Join(fc.jailerRoot, jailedPath) 857 fc.Logger().WithField("resource", hostPath).Debug("Unmounting resource") 858 err := syscall.Unmount(hostPath, syscall.MNT_DETACH) 859 if err != nil { 860 fc.Logger().WithError(err).Error("Failed to umount resource") 861 } 862 } 863 864 // cleanup all jail artifacts 865 func (fc *firecracker) cleanupJail() { 866 span, _ := fc.trace("cleanupJail") 867 defer span.Finish() 868 869 fc.umountResource(fcKernel) 870 fc.umountResource(fcInitrd) 871 fc.umountResource(fcRootfs) 872 fc.umountResource(fcLogFifo) 873 fc.umountResource(fcMetricsFifo) 874 fc.umountResource(defaultFcConfig) 875 // if running with jailer, we also need to umount fc.jailerRoot 876 if fc.config.JailerPath != "" { 877 if err := syscall.Unmount(fc.jailerRoot, syscall.MNT_DETACH); err != nil { 878 fc.Logger().WithField("JailerRoot", fc.jailerRoot).WithError(err).Error("Failed to umount") 879 } 880 } 881 882 fc.Logger().WithField("cleaningJail", fc.vmPath).Info() 883 if err := os.RemoveAll(fc.vmPath); err != nil { 884 fc.Logger().WithField("cleanupJail failed", err).Error() 885 } 886 } 887 888 // stopSandbox will stop the Sandbox's VM. 889 func (fc *firecracker) stopSandbox() (err error) { 890 span, _ := fc.trace("stopSandbox") 891 defer span.Finish() 892 893 return fc.fcEnd() 894 } 895 896 func (fc *firecracker) pauseSandbox() error { 897 return nil 898 } 899 900 func (fc *firecracker) saveSandbox() error { 901 return nil 902 } 903 904 func (fc *firecracker) resumeSandbox() error { 905 return nil 906 } 907 908 func (fc *firecracker) fcAddVsock(hvs types.HybridVSock) { 909 span, _ := fc.trace("fcAddVsock") 910 defer span.Finish() 911 912 udsPath := hvs.UdsPath 913 if fc.jailed { 914 udsPath = filepath.Join("/", defaultHybridVSocketName) 915 } 916 917 vsockID := "root" 918 ctxID := defaultGuestVSockCID 919 vsock := &models.Vsock{ 920 GuestCid: &ctxID, 921 UdsPath: &udsPath, 922 VsockID: &vsockID, 923 } 924 925 fc.fcConfig.Vsock = vsock 926 } 927 928 func (fc *firecracker) fcAddNetDevice(endpoint Endpoint) { 929 span, _ := fc.trace("fcAddNetDevice") 930 defer span.Finish() 931 932 ifaceID := endpoint.Name() 933 ifaceCfg := &models.NetworkInterface{ 934 AllowMmdsRequests: false, 935 GuestMac: endpoint.HardwareAddr(), 936 IfaceID: &ifaceID, 937 HostDevName: &endpoint.NetworkPair().TapInterface.TAPIface.Name, 938 } 939 940 fc.fcConfig.NetworkInterfaces = append(fc.fcConfig.NetworkInterfaces, ifaceCfg) 941 } 942 943 func (fc *firecracker) fcAddBlockDrive(drive config.BlockDrive) error { 944 span, _ := fc.trace("fcAddBlockDrive") 945 defer span.Finish() 946 947 driveID := drive.ID 948 isReadOnly := false 949 isRootDevice := false 950 951 jailedDrive, err := fc.fcJailResource(drive.File, driveID) 952 if err != nil { 953 fc.Logger().WithField("fcAddBlockDrive failed", err).Error() 954 return err 955 } 956 driveFc := &models.Drive{ 957 DriveID: &driveID, 958 IsReadOnly: &isReadOnly, 959 IsRootDevice: &isRootDevice, 960 PathOnHost: &jailedDrive, 961 } 962 963 fc.fcConfig.Drives = append(fc.fcConfig.Drives, driveFc) 964 965 return nil 966 } 967 968 // Firecracker supports replacing the host drive used once the VM has booted up 969 func (fc *firecracker) fcUpdateBlockDrive(path, id string) error { 970 span, _ := fc.trace("fcUpdateBlockDrive") 971 defer span.Finish() 972 973 // Use the global block index as an index into the pool of the devices 974 // created for firecracker. 975 driveParams := ops.NewPatchGuestDriveByIDParams() 976 driveParams.SetDriveID(id) 977 978 driveFc := &models.PartialDrive{ 979 DriveID: &id, 980 PathOnHost: &path, //This is the only property that can be modified 981 } 982 983 driveParams.SetBody(driveFc) 984 if _, err := fc.client().Operations.PatchGuestDriveByID(driveParams); err != nil { 985 return err 986 } 987 988 return nil 989 } 990 991 // addDevice will add extra devices to firecracker. Limited to configure before the 992 // virtual machine starts. Devices include drivers and network interfaces only. 993 func (fc *firecracker) addDevice(devInfo interface{}, devType deviceType) error { 994 span, _ := fc.trace("addDevice") 995 defer span.Finish() 996 997 fc.state.RLock() 998 defer fc.state.RUnlock() 999 1000 if fc.state.state == notReady { 1001 dev := firecrackerDevice{ 1002 dev: devInfo, 1003 devType: devType, 1004 } 1005 fc.Logger().Info("FC not ready, queueing device") 1006 fc.pendingDevices = append(fc.pendingDevices, dev) 1007 return nil 1008 } 1009 1010 var err error 1011 switch v := devInfo.(type) { 1012 case Endpoint: 1013 fc.Logger().WithField("device-type-endpoint", devInfo).Info("Adding device") 1014 fc.fcAddNetDevice(v) 1015 case config.BlockDrive: 1016 fc.Logger().WithField("device-type-blockdrive", devInfo).Info("Adding device") 1017 err = fc.fcAddBlockDrive(v) 1018 case types.HybridVSock: 1019 fc.Logger().WithField("device-type-hybrid-vsock", devInfo).Info("Adding device") 1020 fc.fcAddVsock(v) 1021 default: 1022 fc.Logger().WithField("unknown-device-type", devInfo).Error("Adding device") 1023 } 1024 1025 return err 1026 } 1027 1028 // hotplugBlockDevice supported in Firecracker VMM 1029 // hot add or remove a block device. 1030 func (fc *firecracker) hotplugBlockDevice(drive config.BlockDrive, op operation) (interface{}, error) { 1031 var path string 1032 var err error 1033 driveID := fcDriveIndexToID(drive.Index) 1034 1035 if op == addDevice { 1036 //The drive placeholder has to exist prior to Update 1037 path, err = fc.fcJailResource(drive.File, driveID) 1038 if err != nil { 1039 fc.Logger().WithError(err).WithField("resource", drive.File).Error("Could not jail resource") 1040 return nil, err 1041 } 1042 } else { 1043 // umount the disk, it's no longer needed. 1044 fc.umountResource(driveID) 1045 // use previous raw file created at createDiskPool, that way 1046 // the resource is released by firecracker and it can be destroyed in the host 1047 path = filepath.Join(fc.jailerRoot, driveID) 1048 } 1049 1050 return nil, fc.fcUpdateBlockDrive(path, driveID) 1051 } 1052 1053 // hotplugAddDevice supported in Firecracker VMM 1054 func (fc *firecracker) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) { 1055 span, _ := fc.trace("hotplugAddDevice") 1056 defer span.Finish() 1057 1058 switch devType { 1059 case blockDev: 1060 return fc.hotplugBlockDevice(*devInfo.(*config.BlockDrive), addDevice) 1061 default: 1062 fc.Logger().WithFields(logrus.Fields{"devInfo": devInfo, 1063 "deviceType": devType}).Warn("hotplugAddDevice: unsupported device") 1064 return nil, fmt.Errorf("Could not hot add device: unsupported device: %v, type: %v", 1065 devInfo, devType) 1066 } 1067 } 1068 1069 // hotplugRemoveDevice supported in Firecracker VMM 1070 func (fc *firecracker) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) { 1071 span, _ := fc.trace("hotplugRemoveDevice") 1072 defer span.Finish() 1073 1074 switch devType { 1075 case blockDev: 1076 return fc.hotplugBlockDevice(*devInfo.(*config.BlockDrive), removeDevice) 1077 default: 1078 fc.Logger().WithFields(logrus.Fields{"devInfo": devInfo, 1079 "deviceType": devType}).Error("hotplugRemoveDevice: unsupported device") 1080 return nil, fmt.Errorf("Could not hot remove device: unsupported device: %v, type: %v", 1081 devInfo, devType) 1082 } 1083 } 1084 1085 // getSandboxConsole builds the path of the console where we can read 1086 // logs coming from the sandbox. 1087 func (fc *firecracker) getSandboxConsole(id string) (string, error) { 1088 return fmt.Sprintf("%s://%s:%d", kataclient.HybridVSockScheme, filepath.Join(fc.jailerRoot, defaultHybridVSocketName), vSockLogsPort), nil 1089 } 1090 1091 func (fc *firecracker) disconnect() { 1092 fc.state.set(notReady) 1093 } 1094 1095 // Adds all capabilities supported by firecracker implementation of hypervisor interface 1096 func (fc *firecracker) capabilities() types.Capabilities { 1097 span, _ := fc.trace("capabilities") 1098 defer span.Finish() 1099 var caps types.Capabilities 1100 caps.SetBlockDeviceHotplugSupport() 1101 1102 return caps 1103 } 1104 1105 func (fc *firecracker) hypervisorConfig() HypervisorConfig { 1106 return fc.config 1107 } 1108 1109 func (fc *firecracker) resizeMemory(reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, memoryDevice, error) { 1110 return 0, memoryDevice{}, nil 1111 } 1112 1113 func (fc *firecracker) resizeVCPUs(reqVCPUs uint32) (currentVCPUs uint32, newVCPUs uint32, err error) { 1114 return 0, 0, nil 1115 } 1116 1117 // This is used to apply cgroup information on the host. 1118 // 1119 // As suggested by https://github.com/firecracker-microvm/firecracker/issues/718, 1120 // let's use `ps -T -p <pid>` to get fc vcpu info. 1121 func (fc *firecracker) getThreadIDs() (vcpuThreadIDs, error) { 1122 var vcpuInfo vcpuThreadIDs 1123 1124 vcpuInfo.vcpus = make(map[int]int) 1125 parent, err := utils.NewProc(fc.info.PID) 1126 if err != nil { 1127 return vcpuInfo, err 1128 } 1129 children, err := parent.Children() 1130 if err != nil { 1131 return vcpuInfo, err 1132 } 1133 for _, child := range children { 1134 comm, err := child.Comm() 1135 if err != nil { 1136 return vcpuInfo, errors.New("Invalid fc thread info") 1137 } 1138 if !strings.HasPrefix(comm, "fc_vcpu") { 1139 continue 1140 } 1141 cpus := strings.SplitAfter(comm, "fc_vcpu") 1142 if len(cpus) != 2 { 1143 return vcpuInfo, errors.Errorf("Invalid fc thread info: %v", comm) 1144 } 1145 cpuID, err := strconv.ParseInt(cpus[1], 10, 32) 1146 if err != nil { 1147 return vcpuInfo, errors.Wrapf(err, "Invalid fc thread info: %v", comm) 1148 } 1149 vcpuInfo.vcpus[int(cpuID)] = child.PID 1150 } 1151 1152 return vcpuInfo, nil 1153 } 1154 1155 func (fc *firecracker) cleanup() error { 1156 fc.cleanupJail() 1157 return nil 1158 } 1159 1160 func (fc *firecracker) getPids() []int { 1161 return []int{fc.info.PID} 1162 } 1163 1164 func (fc *firecracker) fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error { 1165 return errors.New("firecracker is not supported by VM cache") 1166 } 1167 1168 func (fc *firecracker) toGrpc() ([]byte, error) { 1169 return nil, errors.New("firecracker is not supported by VM cache") 1170 } 1171 1172 func (fc *firecracker) save() (s persistapi.HypervisorState) { 1173 s.Pid = fc.info.PID 1174 s.Type = string(FirecrackerHypervisor) 1175 return 1176 } 1177 1178 func (fc *firecracker) load(s persistapi.HypervisorState) { 1179 fc.info.PID = s.Pid 1180 } 1181 1182 func (fc *firecracker) check() error { 1183 if err := syscall.Kill(fc.info.PID, syscall.Signal(0)); err != nil { 1184 return errors.Wrapf(err, "failed to ping fc process") 1185 } 1186 1187 return nil 1188 } 1189 1190 func (fc *firecracker) generateSocket(id string, useVsock bool) (interface{}, error) { 1191 if !useVsock { 1192 return nil, fmt.Errorf("Can't start firecracker: vsocks is disabled") 1193 } 1194 1195 fc.Logger().Debug("Using hybrid-vsock endpoint") 1196 udsPath := filepath.Join(fc.jailerRoot, defaultHybridVSocketName) 1197 1198 return types.HybridVSock{ 1199 UdsPath: udsPath, 1200 Port: uint32(vSockPort), 1201 }, nil 1202 } 1203 1204 func (fc *firecracker) watchConsole() (*os.File, error) { 1205 master, slave, err := console.NewPty() 1206 if err != nil { 1207 fc.Logger().WithField("Error create pseudo tty", err).Debug() 1208 return nil, err 1209 } 1210 1211 stdio, err := os.OpenFile(slave, syscall.O_RDWR, 0700) 1212 if err != nil { 1213 fc.Logger().WithError(err).Debugf("open pseudo tty %s", slave) 1214 return nil, err 1215 } 1216 1217 go func() { 1218 scanner := bufio.NewScanner(master) 1219 for scanner.Scan() { 1220 fc.Logger().WithFields(logrus.Fields{ 1221 "sandbox": fc.id, 1222 "vmconsole": scanner.Text(), 1223 }).Infof("reading guest console") 1224 } 1225 1226 if err := scanner.Err(); err != nil { 1227 if err == io.EOF { 1228 fc.Logger().Info("console watcher quits") 1229 } else { 1230 fc.Logger().WithError(err).Error("Failed to read guest console") 1231 } 1232 } 1233 }() 1234 1235 return stdio, nil 1236 } 1237 1238 func (fc *firecracker) getVirtDriveOffset() int { 1239 return fc.hotplugDriveOffset 1240 }