gitee.com/leisunstar/runtime@v0.0.0-20200521203717-5cef3e7b53f9/virtcontainers/fc.go (about) 1 // Copyright (c) 2018 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 6 package virtcontainers 7 8 import ( 9 "bufio" 10 "context" 11 "encoding/json" 12 "fmt" 13 "io" 14 "io/ioutil" 15 "net" 16 "net/http" 17 "os" 18 "os/exec" 19 "path/filepath" 20 "strconv" 21 "strings" 22 "sync" 23 "syscall" 24 "time" 25 26 "github.com/containerd/fifo" 27 httptransport "github.com/go-openapi/runtime/client" 28 "github.com/go-openapi/strfmt" 29 kataclient "github.com/kata-containers/agent/protocols/client" 30 persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api" 31 "github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client" 32 models "github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client/models" 33 ops "github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client/operations" 34 "github.com/opencontainers/selinux/go-selinux/label" 35 opentracing "github.com/opentracing/opentracing-go" 36 "github.com/pkg/errors" 37 "github.com/sirupsen/logrus" 38 39 "github.com/blang/semver" 40 "github.com/containerd/console" 41 "github.com/kata-containers/runtime/virtcontainers/device/config" 42 "github.com/kata-containers/runtime/virtcontainers/types" 43 "github.com/kata-containers/runtime/virtcontainers/utils" 44 ) 45 46 type vmmState uint8 47 48 const ( 49 notReady vmmState = iota 50 cfReady 51 vmReady 52 ) 53 54 const ( 55 //fcTimeout is the maximum amount of time in seconds to wait for the VMM to respond 56 fcTimeout = 10 57 fcSocket = "firecracker.socket" 58 //Name of the files within jailer root 59 //Having predefined names helps with cleanup 60 fcKernel = "vmlinux" 61 fcRootfs = "rootfs" 62 fcStopSandboxTimeout = 15 63 // This indicates the number of block devices that can be attached to the 64 // firecracker guest VM. 65 // We attach a pool of placeholder drives before the guest has started, and then 66 // patch the replace placeholder drives with drives with actual contents. 67 fcDiskPoolSize = 8 68 defaultHybridVSocketName = "kata.hvsock" 69 70 // This is the first usable vsock context ID. All the vsocks can use the same 71 // ID, since it's only used in the guest. 72 defaultGuestVSockCID = int64(0x3) 73 74 // This is related to firecracker logging scheme 75 fcLogFifo = "logs.fifo" 76 fcMetricsFifo = "metrics.fifo" 77 78 defaultFcConfig = "fcConfig.json" 79 // storagePathSuffix mirrors persist/fs/fs.go:storagePathSuffix 80 storagePathSuffix = "vc" 81 ) 82 83 // Specify the minimum version of firecracker supported 84 var fcMinSupportedVersion = semver.MustParse("0.21.1") 85 86 var fcKernelParams = append(commonVirtioblkKernelRootParams, []Param{ 87 // The boot source is the first partition of the first block device added 88 {"pci", "off"}, 89 {"reboot", "k"}, 90 {"panic", "1"}, 91 {"iommu", "off"}, 92 {"net.ifnames", "0"}, 93 {"random.trust_cpu", "on"}, 94 95 // Firecracker doesn't support ACPI 96 // Fix kernel error "ACPI BIOS Error (bug)" 97 {"acpi", "off"}, 98 }...) 99 100 func (s vmmState) String() string { 101 switch s { 102 case notReady: 103 return "FC not ready" 104 case cfReady: 105 return "FC configure ready" 106 case vmReady: 107 return "FC VM ready" 108 } 109 110 return "" 111 } 112 113 // FirecrackerInfo contains information related to the hypervisor that we 114 // want to store on disk 115 type FirecrackerInfo struct { 116 PID int 117 Version string 118 } 119 120 type firecrackerState struct { 121 sync.RWMutex 122 state vmmState 123 } 124 125 func (s *firecrackerState) set(state vmmState) { 126 s.Lock() 127 defer s.Unlock() 128 129 s.state = state 130 } 131 132 // firecracker is an Hypervisor interface implementation for the firecracker VMM. 133 type firecracker struct { 134 id string //Unique ID per pod. Normally maps to the sandbox id 135 vmPath string //All jailed VM assets need to be under this 136 chrootBaseDir string //chroot base for the jailer 137 jailerRoot string 138 socketPath string 139 netNSPath string 140 uid string //UID and GID to be used for the VMM 141 gid string 142 143 info FirecrackerInfo 144 145 firecrackerd *exec.Cmd //Tracks the firecracker process itself 146 connection *client.Firecracker //Tracks the current active connection 147 148 ctx context.Context 149 config HypervisorConfig 150 pendingDevices []firecrackerDevice // Devices to be added before the FC VM ready 151 152 state firecrackerState 153 jailed bool //Set to true if jailer is enabled 154 stateful bool //Set to true if running with shimv2 155 156 fcConfigPath string 157 fcConfig *types.FcConfig // Parameters configured before VM starts 158 } 159 160 type firecrackerDevice struct { 161 dev interface{} 162 devType deviceType 163 } 164 165 // Logger returns a logrus logger appropriate for logging firecracker messages 166 func (fc *firecracker) Logger() *logrus.Entry { 167 return virtLog.WithField("subsystem", "firecracker") 168 } 169 170 func (fc *firecracker) trace(name string) (opentracing.Span, context.Context) { 171 if fc.ctx == nil { 172 fc.Logger().WithField("type", "bug").Error("trace called before context set") 173 fc.ctx = context.Background() 174 } 175 176 span, ctx := opentracing.StartSpanFromContext(fc.ctx, name) 177 178 span.SetTag("subsystem", "hypervisor") 179 span.SetTag("type", "firecracker") 180 181 return span, ctx 182 } 183 184 //At some cases, when sandbox id is too long, it will incur error of overlong 185 //firecracker API unix socket(fc.socketPath). 186 //In Linux, sun_path could maximumly contains 108 bytes in size. 187 //(http://man7.org/linux/man-pages/man7/unix.7.html) 188 func (fc *firecracker) truncateID(id string) string { 189 if len(id) > 32 { 190 //truncate the id to only leave the size of UUID(128bit). 191 return id[:32] 192 } 193 194 return id 195 } 196 197 // For firecracker this call only sets the internal structure up. 198 // The sandbox will be created and started through startSandbox(). 199 func (fc *firecracker) createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig, stateful bool) error { 200 fc.ctx = ctx 201 202 span, _ := fc.trace("createSandbox") 203 defer span.Finish() 204 205 //TODO: check validity of the hypervisor config provided 206 //https://github.com/kata-containers/runtime/issues/1065 207 fc.id = fc.truncateID(id) 208 fc.state.set(notReady) 209 fc.config = *hypervisorConfig 210 fc.stateful = stateful 211 212 // When running with jailer all resources need to be under 213 // a specific location and that location needs to have 214 // exec permission (i.e. should not be mounted noexec, e.g. /run, /var/run) 215 // Also unix domain socket names have a hard limit 216 // #define UNIX_PATH_MAX 108 217 // Keep it short and live within the jailer expected paths 218 // <chroot_base>/<exec_file_name>/<id>/ 219 // Also jailer based on the id implicitly sets up cgroups under 220 // <cgroups_base>/<exec_file_name>/<id>/ 221 hypervisorName := filepath.Base(hypervisorConfig.HypervisorPath) 222 //fs.RunStoragePath cannot be used as we need exec perms 223 fc.chrootBaseDir = filepath.Join("/run", storagePathSuffix) 224 225 fc.vmPath = filepath.Join(fc.chrootBaseDir, hypervisorName, fc.id) 226 fc.jailerRoot = filepath.Join(fc.vmPath, "root") // auto created by jailer 227 228 // Firecracker and jailer automatically creates default API socket under /run 229 // with the name of "firecracker.socket" 230 fc.socketPath = filepath.Join(fc.jailerRoot, "run", fcSocket) 231 232 // So we need to repopulate this at startSandbox where it is valid 233 fc.netNSPath = networkNS.NetNsPath 234 235 // Till we create lower privileged kata user run as root 236 // https://github.com/kata-containers/runtime/issues/1869 237 fc.uid = "0" 238 fc.gid = "0" 239 240 fc.fcConfig = &types.FcConfig{} 241 fc.fcConfigPath = filepath.Join(fc.vmPath, defaultFcConfig) 242 return nil 243 } 244 245 func (fc *firecracker) newFireClient() *client.Firecracker { 246 span, _ := fc.trace("newFireClient") 247 defer span.Finish() 248 httpClient := client.NewHTTPClient(strfmt.NewFormats()) 249 250 socketTransport := &http.Transport{ 251 DialContext: func(ctx context.Context, network, path string) (net.Conn, error) { 252 addr, err := net.ResolveUnixAddr("unix", fc.socketPath) 253 if err != nil { 254 return nil, err 255 } 256 257 return net.DialUnix("unix", nil, addr) 258 }, 259 } 260 261 transport := httptransport.New(client.DefaultHost, client.DefaultBasePath, client.DefaultSchemes) 262 transport.SetLogger(fc.Logger()) 263 transport.SetDebug(fc.Logger().Logger.Level == logrus.DebugLevel) 264 transport.Transport = socketTransport 265 httpClient.SetTransport(transport) 266 267 return httpClient 268 } 269 270 func (fc *firecracker) vmRunning() bool { 271 resp, err := fc.client().Operations.DescribeInstance(nil) 272 if err != nil { 273 fc.Logger().WithError(err).Error("getting vm status failed") 274 return false 275 } 276 277 // Be explicit 278 switch *resp.Payload.State { 279 case models.InstanceInfoStateStarting: 280 // Unsure what we should do here 281 fc.Logger().WithField("unexpected-state", models.InstanceInfoStateStarting).Debug("vmRunning") 282 return false 283 case models.InstanceInfoStateRunning: 284 return true 285 case models.InstanceInfoStateUninitialized: 286 return false 287 default: 288 return false 289 } 290 } 291 292 func (fc *firecracker) getVersionNumber() (string, error) { 293 args := []string{"--version"} 294 checkCMD := exec.Command(fc.config.HypervisorPath, args...) 295 296 data, err := checkCMD.Output() 297 if err != nil { 298 return "", fmt.Errorf("Running checking FC version command failed: %v", err) 299 } 300 301 var version string 302 fields := strings.Split(string(data), " ") 303 if len(fields) > 1 { 304 // The output format of `Firecracker --verion` is as follows 305 // Firecracker v0.21.1 306 version = strings.TrimPrefix(strings.TrimSpace(fields[1]), "v") 307 return version, nil 308 } 309 310 return "", errors.New("getting FC version failed, the output is malformed") 311 } 312 313 func (fc *firecracker) checkVersion(version string) error { 314 v, err := semver.Make(version) 315 if err != nil { 316 return fmt.Errorf("Malformed firecracker version: %v", err) 317 } 318 319 if v.LT(fcMinSupportedVersion) { 320 return fmt.Errorf("version %v is not supported. Minimum supported version of firecracker is %v", v.String(), fcMinSupportedVersion.String()) 321 } 322 323 return nil 324 } 325 326 // waitVMMRunning will wait for timeout seconds for the VMM to be up and running. 327 func (fc *firecracker) waitVMMRunning(timeout int) error { 328 span, _ := fc.trace("wait VMM to be running") 329 defer span.Finish() 330 331 if timeout < 0 { 332 return fmt.Errorf("Invalid timeout %ds", timeout) 333 } 334 335 timeStart := time.Now() 336 for { 337 if fc.vmRunning() { 338 return nil 339 } 340 341 if int(time.Since(timeStart).Seconds()) > timeout { 342 return fmt.Errorf("Failed to connect to firecrackerinstance (timeout %ds)", timeout) 343 } 344 345 time.Sleep(time.Duration(10) * time.Millisecond) 346 } 347 } 348 349 func (fc *firecracker) fcInit(timeout int) error { 350 span, _ := fc.trace("fcInit") 351 defer span.Finish() 352 353 var err error 354 //FC version set and check 355 if fc.info.Version, err = fc.getVersionNumber(); err != nil { 356 return err 357 } 358 359 if err := fc.checkVersion(fc.info.Version); err != nil { 360 return err 361 } 362 363 var cmd *exec.Cmd 364 var args []string 365 366 if fc.fcConfigPath, err = fc.fcJailResource(fc.fcConfigPath, defaultFcConfig); err != nil { 367 return err 368 } 369 370 if !fc.config.Debug && fc.stateful { 371 args = append(args, "--daemonize") 372 } 373 374 //https://github.com/firecracker-microvm/firecracker/blob/master/docs/jailer.md#jailer-usage 375 //--seccomp-level specifies whether seccomp filters should be installed and how restrictive they should be. Possible values are: 376 //0 : disabled. 377 //1 : basic filtering. This prohibits syscalls not whitelisted by Firecracker. 378 //2 (default): advanced filtering. This adds further checks on some of the parameters of the allowed syscalls. 379 if fc.jailed { 380 jailedArgs := []string{ 381 "--id", fc.id, 382 "--node", "0", //FIXME: Comprehend NUMA topology or explicit ignore 383 "--exec-file", fc.config.HypervisorPath, 384 "--uid", "0", //https://github.com/kata-containers/runtime/issues/1869 385 "--gid", "0", 386 "--chroot-base-dir", fc.chrootBaseDir, 387 } 388 args = append(args, jailedArgs...) 389 if fc.netNSPath != "" { 390 args = append(args, "--netns", fc.netNSPath) 391 } 392 args = append(args, "--", "--config-file", fc.fcConfigPath) 393 394 cmd = exec.Command(fc.config.JailerPath, args...) 395 } else { 396 args = append(args, 397 "--api-sock", fc.socketPath, 398 "--config-file", fc.fcConfigPath) 399 cmd = exec.Command(fc.config.HypervisorPath, args...) 400 } 401 402 if fc.config.Debug && fc.stateful { 403 stdin, err := fc.watchConsole() 404 if err != nil { 405 return err 406 } 407 408 cmd.Stderr = stdin 409 cmd.Stdout = stdin 410 } 411 412 fc.Logger().WithField("hypervisor args", args).Debug() 413 fc.Logger().WithField("hypervisor cmd", cmd).Debug() 414 415 fc.Logger().Info("Starting VM") 416 if err := cmd.Start(); err != nil { 417 fc.Logger().WithField("Error starting firecracker", err).Debug() 418 return err 419 } 420 421 fc.info.PID = cmd.Process.Pid 422 fc.firecrackerd = cmd 423 fc.connection = fc.newFireClient() 424 425 if err := fc.waitVMMRunning(timeout); err != nil { 426 fc.Logger().WithField("fcInit failed:", err).Debug() 427 return err 428 } 429 return nil 430 } 431 432 func (fc *firecracker) fcEnd() (err error) { 433 span, _ := fc.trace("fcEnd") 434 defer span.Finish() 435 436 fc.Logger().Info("Stopping firecracker VM") 437 438 defer func() { 439 if err != nil { 440 fc.Logger().Info("fcEnd failed") 441 } else { 442 fc.Logger().Info("Firecracker VM stopped") 443 } 444 }() 445 446 pid := fc.info.PID 447 448 // Send a SIGTERM to the VM process to try to stop it properly 449 if err = syscall.Kill(pid, syscall.SIGTERM); err != nil { 450 if err == syscall.ESRCH { 451 return nil 452 } 453 return err 454 } 455 456 // Wait for the VM process to terminate 457 tInit := time.Now() 458 for { 459 if err = syscall.Kill(pid, syscall.Signal(0)); err != nil { 460 return nil 461 } 462 463 if time.Since(tInit).Seconds() >= fcStopSandboxTimeout { 464 fc.Logger().Warnf("VM still running after waiting %ds", fcStopSandboxTimeout) 465 break 466 } 467 468 // Let's avoid to run a too busy loop 469 time.Sleep(time.Duration(50) * time.Millisecond) 470 } 471 472 // Let's try with a hammer now, a SIGKILL should get rid of the 473 // VM process. 474 return syscall.Kill(pid, syscall.SIGKILL) 475 } 476 477 func (fc *firecracker) client() *client.Firecracker { 478 span, _ := fc.trace("client") 479 defer span.Finish() 480 481 if fc.connection == nil { 482 fc.connection = fc.newFireClient() 483 } 484 485 return fc.connection 486 } 487 488 func (fc *firecracker) createJailedDrive(name string) (string, error) { 489 // Don't bind mount the resource, just create a raw file 490 // that can be bind-mounted later 491 r := filepath.Join(fc.jailerRoot, name) 492 f, err := os.Create(r) 493 if err != nil { 494 return "", err 495 } 496 f.Close() 497 498 if fc.jailed { 499 // use path relative to the jail 500 r = filepath.Join("/", name) 501 } 502 503 return r, nil 504 } 505 506 // when running with jailer, firecracker binary will firstly be copied into fc.jailerRoot, 507 // and then being executed there. Therefore we need to ensure fc.JailerRoot has exec permissions. 508 func (fc *firecracker) fcRemountJailerRootWithExec() error { 509 if err := bindMount(context.Background(), fc.jailerRoot, fc.jailerRoot, false, "shared"); err != nil { 510 fc.Logger().WithField("JailerRoot", fc.jailerRoot).Errorf("bindMount failed: %v", err) 511 return err 512 } 513 514 // /run is normally mounted with rw, nosuid(MS_NOSUID), relatime(MS_RELATIME), noexec(MS_NOEXEC). 515 // we re-mount jailerRoot to deliberately leave out MS_NOEXEC. 516 if err := remount(context.Background(), syscall.MS_NOSUID|syscall.MS_RELATIME, fc.jailerRoot); err != nil { 517 fc.Logger().WithField("JailerRoot", fc.jailerRoot).Errorf("Re-mount failed: %v", err) 518 return err 519 } 520 521 return nil 522 } 523 524 func (fc *firecracker) fcJailResource(src, dst string) (string, error) { 525 if src == "" || dst == "" { 526 return "", fmt.Errorf("fcJailResource: invalid jail locations: src:%v, dst:%v", 527 src, dst) 528 } 529 jailedLocation := filepath.Join(fc.jailerRoot, dst) 530 if err := bindMount(context.Background(), src, jailedLocation, false, "slave"); err != nil { 531 fc.Logger().WithField("bindMount failed", err).Error() 532 return "", err 533 } 534 535 if !fc.jailed { 536 return jailedLocation, nil 537 } 538 539 // This is the path within the jailed root 540 absPath := filepath.Join("/", dst) 541 return absPath, nil 542 } 543 544 func (fc *firecracker) fcSetBootSource(path, params string) error { 545 span, _ := fc.trace("fcSetBootSource") 546 defer span.Finish() 547 fc.Logger().WithFields(logrus.Fields{"kernel-path": path, 548 "kernel-params": params}).Debug("fcSetBootSource") 549 550 kernelPath, err := fc.fcJailResource(path, fcKernel) 551 if err != nil { 552 return err 553 } 554 555 src := &models.BootSource{ 556 KernelImagePath: &kernelPath, 557 BootArgs: params, 558 } 559 560 fc.fcConfig.BootSource = src 561 562 return nil 563 } 564 565 func (fc *firecracker) fcSetVMRootfs(path string) error { 566 span, _ := fc.trace("fcSetVMRootfs") 567 defer span.Finish() 568 569 jailedRootfs, err := fc.fcJailResource(path, fcRootfs) 570 if err != nil { 571 return err 572 } 573 574 driveID := "rootfs" 575 isReadOnly := true 576 //Add it as a regular block device 577 //This allows us to use a partitoned root block device 578 isRootDevice := false 579 // This is the path within the jailed root 580 drive := &models.Drive{ 581 DriveID: &driveID, 582 IsReadOnly: &isReadOnly, 583 IsRootDevice: &isRootDevice, 584 PathOnHost: &jailedRootfs, 585 } 586 587 fc.fcConfig.Drives = append(fc.fcConfig.Drives, drive) 588 589 return nil 590 } 591 592 func (fc *firecracker) fcSetVMBaseConfig(mem int64, vcpus int64, htEnabled bool) { 593 span, _ := fc.trace("fcSetVMBaseConfig") 594 defer span.Finish() 595 fc.Logger().WithFields(logrus.Fields{"mem": mem, 596 "vcpus": vcpus, 597 "htEnabled": htEnabled}).Debug("fcSetVMBaseConfig") 598 599 cfg := &models.MachineConfiguration{ 600 HtEnabled: &htEnabled, 601 MemSizeMib: &mem, 602 VcpuCount: &vcpus, 603 } 604 605 fc.fcConfig.MachineConfig = cfg 606 } 607 608 func (fc *firecracker) fcSetLogger() error { 609 span, _ := fc.trace("fcSetLogger") 610 defer span.Finish() 611 612 fcLogLevel := "Error" 613 if fc.config.Debug { 614 fcLogLevel = "Debug" 615 } 616 617 // listen to log fifo file and transfer error info 618 jailedLogFifo, err := fc.fcListenToFifo(fcLogFifo) 619 if err != nil { 620 return fmt.Errorf("Failed setting log: %s", err) 621 } 622 623 // listen to metrics file and transfer error info 624 jailedMetricsFifo, err := fc.fcListenToFifo(fcMetricsFifo) 625 if err != nil { 626 return fmt.Errorf("Failed setting log: %s", err) 627 } 628 629 fc.fcConfig.Logger = &models.Logger{ 630 Level: &fcLogLevel, 631 LogFifo: &jailedLogFifo, 632 MetricsFifo: &jailedMetricsFifo, 633 } 634 635 return err 636 } 637 638 func (fc *firecracker) fcListenToFifo(fifoName string) (string, error) { 639 fcFifoPath := filepath.Join(fc.vmPath, fifoName) 640 fcFifo, err := fifo.OpenFifo(context.Background(), fcFifoPath, syscall.O_CREAT|syscall.O_RDONLY|syscall.O_NONBLOCK, 0) 641 if err != nil { 642 return "", fmt.Errorf("Failed to open/create fifo file %s", err) 643 } 644 645 jailedFifoPath, err := fc.fcJailResource(fcFifoPath, fifoName) 646 if err != nil { 647 return "", err 648 } 649 650 go func() { 651 scanner := bufio.NewScanner(fcFifo) 652 for scanner.Scan() { 653 fc.Logger().WithFields(logrus.Fields{ 654 "fifoName": fifoName, 655 "contents": scanner.Text()}).Error("firecracker failed") 656 } 657 658 if err := scanner.Err(); err != nil { 659 fc.Logger().WithError(err).Errorf("Failed reading firecracker fifo file") 660 } 661 662 if err := fcFifo.Close(); err != nil { 663 fc.Logger().WithError(err).Errorf("Failed closing firecracker fifo file") 664 } 665 }() 666 667 return jailedFifoPath, nil 668 } 669 670 func (fc *firecracker) fcInitConfiguration() error { 671 // Firecracker API socket(firecracker.socket) is automatically created 672 // under /run dir. 673 err := os.MkdirAll(filepath.Join(fc.jailerRoot, "run"), DirMode) 674 if err != nil { 675 return err 676 } 677 defer func() { 678 if err != nil { 679 if err := os.RemoveAll(fc.vmPath); err != nil { 680 fc.Logger().WithError(err).Error("Fail to clean up vm directory") 681 } 682 } 683 }() 684 685 if fc.config.JailerPath != "" { 686 fc.jailed = true 687 if err := fc.fcRemountJailerRootWithExec(); err != nil { 688 return err 689 } 690 } 691 692 fc.fcSetVMBaseConfig(int64(fc.config.MemorySize), 693 int64(fc.config.NumVCPUs), false) 694 695 kernelPath, err := fc.config.KernelAssetPath() 696 if err != nil { 697 return err 698 } 699 700 if fc.config.Debug && fc.stateful { 701 fcKernelParams = append(fcKernelParams, Param{"console", "ttyS0"}) 702 } else { 703 fcKernelParams = append(fcKernelParams, []Param{ 704 {"8250.nr_uarts", "0"}, 705 // Tell agent where to send the logs 706 {"agent.log_vport", fmt.Sprintf("%d", vSockLogsPort)}, 707 }...) 708 } 709 710 kernelParams := append(fc.config.KernelParams, fcKernelParams...) 711 strParams := SerializeParams(kernelParams, "=") 712 formattedParams := strings.Join(strParams, " ") 713 if err := fc.fcSetBootSource(kernelPath, formattedParams); err != nil { 714 return err 715 } 716 717 image, err := fc.config.InitrdAssetPath() 718 if err != nil { 719 return err 720 } 721 722 if image == "" { 723 image, err = fc.config.ImageAssetPath() 724 if err != nil { 725 return err 726 } 727 } 728 729 if err := fc.fcSetVMRootfs(image); err != nil { 730 return err 731 } 732 733 if err := fc.createDiskPool(); err != nil { 734 return err 735 } 736 737 if err := fc.fcSetLogger(); err != nil { 738 return err 739 } 740 741 fc.state.set(cfReady) 742 for _, d := range fc.pendingDevices { 743 if err := fc.addDevice(d.dev, d.devType); err != nil { 744 return err 745 } 746 } 747 748 return nil 749 } 750 751 // startSandbox will start the hypervisor for the given sandbox. 752 // In the context of firecracker, this will start the hypervisor, 753 // for configuration, but not yet start the actual virtual machine 754 func (fc *firecracker) startSandbox(timeout int) error { 755 span, _ := fc.trace("startSandbox") 756 defer span.Finish() 757 758 if err := fc.fcInitConfiguration(); err != nil { 759 return err 760 } 761 762 data, errJSON := json.MarshalIndent(fc.fcConfig, "", "\t") 763 if errJSON != nil { 764 return errJSON 765 } 766 767 if err := ioutil.WriteFile(fc.fcConfigPath, data, 0640); err != nil { 768 return err 769 } 770 771 var err error 772 defer func() { 773 if err != nil { 774 fc.fcEnd() 775 } 776 }() 777 778 // This needs to be done as late as possible, since all processes that 779 // are executed by kata-runtime after this call, run with the SELinux 780 // label. If these processes require privileged, we do not want to run 781 // them under confinement. 782 if err := label.SetProcessLabel(fc.config.SELinuxProcessLabel); err != nil { 783 return err 784 } 785 defer label.SetProcessLabel("") 786 787 err = fc.fcInit(fcTimeout) 788 if err != nil { 789 return err 790 } 791 792 // make sure 'others' don't have access to this socket 793 err = os.Chmod(filepath.Join(fc.jailerRoot, defaultHybridVSocketName), 0640) 794 if err != nil { 795 return fmt.Errorf("Could not change socket permissions: %v", err) 796 } 797 798 fc.state.set(vmReady) 799 return nil 800 } 801 802 func fcDriveIndexToID(i int) string { 803 return "drive_" + strconv.Itoa(i) 804 } 805 806 func (fc *firecracker) createDiskPool() error { 807 span, _ := fc.trace("createDiskPool") 808 defer span.Finish() 809 810 for i := 0; i < fcDiskPoolSize; i++ { 811 driveID := fcDriveIndexToID(i) 812 isReadOnly := false 813 isRootDevice := false 814 815 // Create a temporary file as a placeholder backend for the drive 816 jailedDrive, err := fc.createJailedDrive(driveID) 817 if err != nil { 818 return err 819 } 820 821 drive := &models.Drive{ 822 DriveID: &driveID, 823 IsReadOnly: &isReadOnly, 824 IsRootDevice: &isRootDevice, 825 PathOnHost: &jailedDrive, 826 } 827 828 fc.fcConfig.Drives = append(fc.fcConfig.Drives, drive) 829 } 830 831 return nil 832 } 833 834 func (fc *firecracker) umountResource(jailedPath string) { 835 hostPath := filepath.Join(fc.jailerRoot, jailedPath) 836 fc.Logger().WithField("resource", hostPath).Debug("Unmounting resource") 837 err := syscall.Unmount(hostPath, syscall.MNT_DETACH) 838 if err != nil { 839 fc.Logger().WithError(err).Error("Failed to umount resource") 840 } 841 } 842 843 // cleanup all jail artifacts 844 func (fc *firecracker) cleanupJail() { 845 span, _ := fc.trace("cleanupJail") 846 defer span.Finish() 847 848 fc.umountResource(fcKernel) 849 fc.umountResource(fcRootfs) 850 fc.umountResource(fcLogFifo) 851 fc.umountResource(fcMetricsFifo) 852 fc.umountResource(defaultFcConfig) 853 // if running with jailer, we also need to umount fc.jailerRoot 854 if fc.config.JailerPath != "" { 855 if err := syscall.Unmount(fc.jailerRoot, syscall.MNT_DETACH); err != nil { 856 fc.Logger().WithField("JailerRoot", fc.jailerRoot).WithError(err).Error("Failed to umount") 857 } 858 } 859 860 fc.Logger().WithField("cleaningJail", fc.vmPath).Info() 861 if err := os.RemoveAll(fc.vmPath); err != nil { 862 fc.Logger().WithField("cleanupJail failed", err).Error() 863 } 864 } 865 866 // stopSandbox will stop the Sandbox's VM. 867 func (fc *firecracker) stopSandbox() (err error) { 868 span, _ := fc.trace("stopSandbox") 869 defer span.Finish() 870 871 return fc.fcEnd() 872 } 873 874 func (fc *firecracker) pauseSandbox() error { 875 return nil 876 } 877 878 func (fc *firecracker) saveSandbox() error { 879 return nil 880 } 881 882 func (fc *firecracker) resumeSandbox() error { 883 return nil 884 } 885 886 func (fc *firecracker) fcAddVsock(hvs types.HybridVSock) { 887 span, _ := fc.trace("fcAddVsock") 888 defer span.Finish() 889 890 udsPath := hvs.UdsPath 891 if fc.jailed { 892 udsPath = filepath.Join("/", defaultHybridVSocketName) 893 } 894 895 vsockID := "root" 896 ctxID := defaultGuestVSockCID 897 vsock := &models.Vsock{ 898 GuestCid: &ctxID, 899 UdsPath: &udsPath, 900 VsockID: &vsockID, 901 } 902 903 fc.fcConfig.Vsock = vsock 904 } 905 906 func (fc *firecracker) fcAddNetDevice(endpoint Endpoint) { 907 span, _ := fc.trace("fcAddNetDevice") 908 defer span.Finish() 909 910 ifaceID := endpoint.Name() 911 ifaceCfg := &models.NetworkInterface{ 912 AllowMmdsRequests: false, 913 GuestMac: endpoint.HardwareAddr(), 914 IfaceID: &ifaceID, 915 HostDevName: &endpoint.NetworkPair().TapInterface.TAPIface.Name, 916 } 917 918 fc.fcConfig.NetworkInterfaces = append(fc.fcConfig.NetworkInterfaces, ifaceCfg) 919 } 920 921 func (fc *firecracker) fcAddBlockDrive(drive config.BlockDrive) error { 922 span, _ := fc.trace("fcAddBlockDrive") 923 defer span.Finish() 924 925 driveID := drive.ID 926 isReadOnly := false 927 isRootDevice := false 928 929 jailedDrive, err := fc.fcJailResource(drive.File, driveID) 930 if err != nil { 931 fc.Logger().WithField("fcAddBlockDrive failed", err).Error() 932 return err 933 } 934 driveFc := &models.Drive{ 935 DriveID: &driveID, 936 IsReadOnly: &isReadOnly, 937 IsRootDevice: &isRootDevice, 938 PathOnHost: &jailedDrive, 939 } 940 941 fc.fcConfig.Drives = append(fc.fcConfig.Drives, driveFc) 942 943 return nil 944 } 945 946 // Firecracker supports replacing the host drive used once the VM has booted up 947 func (fc *firecracker) fcUpdateBlockDrive(path, id string) error { 948 span, _ := fc.trace("fcUpdateBlockDrive") 949 defer span.Finish() 950 951 // Use the global block index as an index into the pool of the devices 952 // created for firecracker. 953 driveParams := ops.NewPatchGuestDriveByIDParams() 954 driveParams.SetDriveID(id) 955 956 driveFc := &models.PartialDrive{ 957 DriveID: &id, 958 PathOnHost: &path, //This is the only property that can be modified 959 } 960 961 driveParams.SetBody(driveFc) 962 if _, err := fc.client().Operations.PatchGuestDriveByID(driveParams); err != nil { 963 return err 964 } 965 966 return nil 967 } 968 969 // addDevice will add extra devices to firecracker. Limited to configure before the 970 // virtual machine starts. Devices include drivers and network interfaces only. 971 func (fc *firecracker) addDevice(devInfo interface{}, devType deviceType) error { 972 span, _ := fc.trace("addDevice") 973 defer span.Finish() 974 975 fc.state.RLock() 976 defer fc.state.RUnlock() 977 978 if fc.state.state == notReady { 979 dev := firecrackerDevice{ 980 dev: devInfo, 981 devType: devType, 982 } 983 fc.Logger().Info("FC not ready, queueing device") 984 fc.pendingDevices = append(fc.pendingDevices, dev) 985 return nil 986 } 987 988 var err error 989 switch v := devInfo.(type) { 990 case Endpoint: 991 fc.Logger().WithField("device-type-endpoint", devInfo).Info("Adding device") 992 fc.fcAddNetDevice(v) 993 case config.BlockDrive: 994 fc.Logger().WithField("device-type-blockdrive", devInfo).Info("Adding device") 995 err = fc.fcAddBlockDrive(v) 996 case types.HybridVSock: 997 fc.Logger().WithField("device-type-hybrid-vsock", devInfo).Info("Adding device") 998 fc.fcAddVsock(v) 999 default: 1000 fc.Logger().WithField("unknown-device-type", devInfo).Error("Adding device") 1001 } 1002 1003 return err 1004 } 1005 1006 // hotplugBlockDevice supported in Firecracker VMM 1007 // hot add or remove a block device. 1008 func (fc *firecracker) hotplugBlockDevice(drive config.BlockDrive, op operation) (interface{}, error) { 1009 var path string 1010 var err error 1011 driveID := fcDriveIndexToID(drive.Index) 1012 1013 if op == addDevice { 1014 //The drive placeholder has to exist prior to Update 1015 path, err = fc.fcJailResource(drive.File, driveID) 1016 if err != nil { 1017 fc.Logger().WithError(err).WithField("resource", drive.File).Error("Could not jail resource") 1018 return nil, err 1019 } 1020 } else { 1021 // umount the disk, it's no longer needed. 1022 fc.umountResource(driveID) 1023 // use previous raw file created at createDiskPool, that way 1024 // the resource is released by firecracker and it can be destroyed in the host 1025 path = filepath.Join(fc.jailerRoot, driveID) 1026 } 1027 1028 return nil, fc.fcUpdateBlockDrive(path, driveID) 1029 } 1030 1031 // hotplugAddDevice supported in Firecracker VMM 1032 func (fc *firecracker) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) { 1033 span, _ := fc.trace("hotplugAddDevice") 1034 defer span.Finish() 1035 1036 switch devType { 1037 case blockDev: 1038 return fc.hotplugBlockDevice(*devInfo.(*config.BlockDrive), addDevice) 1039 default: 1040 fc.Logger().WithFields(logrus.Fields{"devInfo": devInfo, 1041 "deviceType": devType}).Warn("hotplugAddDevice: unsupported device") 1042 return nil, fmt.Errorf("Could not hot add device: unsupported device: %v, type: %v", 1043 devInfo, devType) 1044 } 1045 } 1046 1047 // hotplugRemoveDevice supported in Firecracker VMM 1048 func (fc *firecracker) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) { 1049 span, _ := fc.trace("hotplugRemoveDevice") 1050 defer span.Finish() 1051 1052 switch devType { 1053 case blockDev: 1054 return fc.hotplugBlockDevice(*devInfo.(*config.BlockDrive), removeDevice) 1055 default: 1056 fc.Logger().WithFields(logrus.Fields{"devInfo": devInfo, 1057 "deviceType": devType}).Error("hotplugRemoveDevice: unsupported device") 1058 return nil, fmt.Errorf("Could not hot remove device: unsupported device: %v, type: %v", 1059 devInfo, devType) 1060 } 1061 } 1062 1063 // getSandboxConsole builds the path of the console where we can read 1064 // logs coming from the sandbox. 1065 func (fc *firecracker) getSandboxConsole(id string) (string, error) { 1066 return fmt.Sprintf("%s://%s:%d", kataclient.HybridVSockScheme, filepath.Join(fc.jailerRoot, defaultHybridVSocketName), vSockLogsPort), nil 1067 } 1068 1069 func (fc *firecracker) disconnect() { 1070 fc.state.set(notReady) 1071 } 1072 1073 // Adds all capabilities supported by firecracker implementation of hypervisor interface 1074 func (fc *firecracker) capabilities() types.Capabilities { 1075 span, _ := fc.trace("capabilities") 1076 defer span.Finish() 1077 var caps types.Capabilities 1078 caps.SetBlockDeviceHotplugSupport() 1079 1080 return caps 1081 } 1082 1083 func (fc *firecracker) hypervisorConfig() HypervisorConfig { 1084 return fc.config 1085 } 1086 1087 func (fc *firecracker) resizeMemory(reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, memoryDevice, error) { 1088 return 0, memoryDevice{}, nil 1089 } 1090 1091 func (fc *firecracker) resizeVCPUs(reqVCPUs uint32) (currentVCPUs uint32, newVCPUs uint32, err error) { 1092 return 0, 0, nil 1093 } 1094 1095 // This is used to apply cgroup information on the host. 1096 // 1097 // As suggested by https://github.com/firecracker-microvm/firecracker/issues/718, 1098 // let's use `ps -T -p <pid>` to get fc vcpu info. 1099 func (fc *firecracker) getThreadIDs() (vcpuThreadIDs, error) { 1100 var vcpuInfo vcpuThreadIDs 1101 1102 vcpuInfo.vcpus = make(map[int]int) 1103 parent, err := utils.NewProc(fc.info.PID) 1104 if err != nil { 1105 return vcpuInfo, err 1106 } 1107 children, err := parent.Children() 1108 if err != nil { 1109 return vcpuInfo, err 1110 } 1111 for _, child := range children { 1112 comm, err := child.Comm() 1113 if err != nil { 1114 return vcpuInfo, errors.New("Invalid fc thread info") 1115 } 1116 if !strings.HasPrefix(comm, "fc_vcpu") { 1117 continue 1118 } 1119 cpus := strings.SplitAfter(comm, "fc_vcpu") 1120 if len(cpus) != 2 { 1121 return vcpuInfo, errors.Errorf("Invalid fc thread info: %v", comm) 1122 } 1123 cpuID, err := strconv.ParseInt(cpus[1], 10, 32) 1124 if err != nil { 1125 return vcpuInfo, errors.Wrapf(err, "Invalid fc thread info: %v", comm) 1126 } 1127 vcpuInfo.vcpus[int(cpuID)] = child.PID 1128 } 1129 1130 return vcpuInfo, nil 1131 } 1132 1133 func (fc *firecracker) cleanup() error { 1134 fc.cleanupJail() 1135 return nil 1136 } 1137 1138 func (fc *firecracker) getPids() []int { 1139 return []int{fc.info.PID} 1140 } 1141 1142 func (fc *firecracker) fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error { 1143 return errors.New("firecracker is not supported by VM cache") 1144 } 1145 1146 func (fc *firecracker) toGrpc() ([]byte, error) { 1147 return nil, errors.New("firecracker is not supported by VM cache") 1148 } 1149 1150 func (fc *firecracker) save() (s persistapi.HypervisorState) { 1151 s.Pid = fc.info.PID 1152 s.Type = string(FirecrackerHypervisor) 1153 return 1154 } 1155 1156 func (fc *firecracker) load(s persistapi.HypervisorState) { 1157 fc.info.PID = s.Pid 1158 } 1159 1160 func (fc *firecracker) check() error { 1161 if err := syscall.Kill(fc.info.PID, syscall.Signal(0)); err != nil { 1162 return errors.Wrapf(err, "failed to ping fc process") 1163 } 1164 1165 return nil 1166 } 1167 1168 func (fc *firecracker) generateSocket(id string, useVsock bool) (interface{}, error) { 1169 if !useVsock { 1170 return nil, fmt.Errorf("Can't start firecracker: vsocks is disabled") 1171 } 1172 1173 fc.Logger().Debug("Using hybrid-vsock endpoint") 1174 udsPath := filepath.Join(fc.jailerRoot, defaultHybridVSocketName) 1175 1176 return types.HybridVSock{ 1177 UdsPath: udsPath, 1178 Port: uint32(vSockPort), 1179 }, nil 1180 } 1181 1182 func (fc *firecracker) watchConsole() (*os.File, error) { 1183 master, slave, err := console.NewPty() 1184 if err != nil { 1185 fc.Logger().WithField("Error create pseudo tty", err).Debug() 1186 return nil, err 1187 } 1188 1189 stdio, err := os.OpenFile(slave, syscall.O_RDWR, 0700) 1190 if err != nil { 1191 fc.Logger().WithError(err).Debugf("open pseudo tty %s", slave) 1192 return nil, err 1193 } 1194 1195 go func() { 1196 scanner := bufio.NewScanner(master) 1197 for scanner.Scan() { 1198 fc.Logger().WithFields(logrus.Fields{ 1199 "sandbox": fc.id, 1200 "vmconsole": scanner.Text(), 1201 }).Infof("reading guest console") 1202 } 1203 1204 if err := scanner.Err(); err != nil { 1205 if err == io.EOF { 1206 fc.Logger().Info("console watcher quits") 1207 } else { 1208 fc.Logger().WithError(err).Error("Failed to read guest console") 1209 } 1210 } 1211 }() 1212 1213 return stdio, nil 1214 }