github.com/kata-containers/runtime@v0.0.0-20210505125100-04f29832a923/virtcontainers/fc.go (about)

     1  // Copyright (c) 2018 Intel Corporation
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  //
     5  
     6  package virtcontainers
     7  
     8  import (
     9  	"bufio"
    10  	"context"
    11  	"encoding/json"
    12  	"fmt"
    13  	"io"
    14  	"io/ioutil"
    15  	"net"
    16  	"net/http"
    17  	"os"
    18  	"os/exec"
    19  	"path/filepath"
    20  	"strconv"
    21  	"strings"
    22  	"sync"
    23  	"syscall"
    24  	"time"
    25  
    26  	"github.com/containerd/fifo"
    27  	httptransport "github.com/go-openapi/runtime/client"
    28  	"github.com/go-openapi/strfmt"
    29  	kataclient "github.com/kata-containers/agent/protocols/client"
    30  	persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api"
    31  	"github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client"
    32  	models "github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client/models"
    33  	ops "github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client/operations"
    34  	"github.com/opencontainers/selinux/go-selinux/label"
    35  	opentracing "github.com/opentracing/opentracing-go"
    36  	"github.com/pkg/errors"
    37  	"github.com/sirupsen/logrus"
    38  
    39  	"github.com/blang/semver"
    40  	"github.com/containerd/console"
    41  	"github.com/kata-containers/runtime/virtcontainers/device/config"
    42  	"github.com/kata-containers/runtime/virtcontainers/types"
    43  	"github.com/kata-containers/runtime/virtcontainers/utils"
    44  )
    45  
    46  type vmmState uint8
    47  
    48  const (
    49  	notReady vmmState = iota
    50  	cfReady
    51  	vmReady
    52  )
    53  
    54  const (
    55  	//fcTimeout is the maximum amount of time in seconds to wait for the VMM to respond
    56  	fcTimeout = 10
    57  	fcSocket  = "firecracker.socket"
    58  	//Name of the files within jailer root
    59  	//Having predefined names helps with cleanup
    60  	fcKernel             = "vmlinux"
    61  	fcInitrd             = "initrd"
    62  	fcRootfs             = "rootfs"
    63  	fcStopSandboxTimeout = 15
    64  	// This indicates the number of block devices that can be attached to the
    65  	// firecracker guest VM.
    66  	// We attach a pool of placeholder drives before the guest has started, and then
    67  	// patch the replace placeholder drives with drives with actual contents.
    68  	fcDiskPoolSize           = 8
    69  	defaultHybridVSocketName = "kata.hvsock"
    70  
    71  	// This is the first usable vsock context ID. All the vsocks can use the same
    72  	// ID, since it's only used in the guest.
    73  	defaultGuestVSockCID = int64(0x3)
    74  
    75  	// This is related to firecracker logging scheme
    76  	fcLogFifo     = "logs.fifo"
    77  	fcMetricsFifo = "metrics.fifo"
    78  
    79  	defaultFcConfig = "fcConfig.json"
    80  	// storagePathSuffix mirrors persist/fs/fs.go:storagePathSuffix
    81  	storagePathSuffix = "vc"
    82  )
    83  
    84  // Specify the minimum version of firecracker supported
    85  var fcMinSupportedVersion = semver.MustParse("0.21.1")
    86  
    87  var fcKernelParams = []Param{
    88  	// The boot source is the first partition of the first block device added
    89  	{"pci", "off"},
    90  	{"reboot", "k"},
    91  	{"panic", "1"},
    92  	{"iommu", "off"},
    93  	{"net.ifnames", "0"},
    94  	{"random.trust_cpu", "on"},
    95  
    96  	// Firecracker doesn't support ACPI
    97  	// Fix kernel error "ACPI BIOS Error (bug)"
    98  	{"acpi", "off"},
    99  }
   100  
   101  func (s vmmState) String() string {
   102  	switch s {
   103  	case notReady:
   104  		return "FC not ready"
   105  	case cfReady:
   106  		return "FC configure ready"
   107  	case vmReady:
   108  		return "FC VM ready"
   109  	}
   110  
   111  	return ""
   112  }
   113  
   114  // FirecrackerInfo contains information related to the hypervisor that we
   115  // want to store on disk
   116  type FirecrackerInfo struct {
   117  	PID     int
   118  	Version string
   119  }
   120  
   121  type firecrackerState struct {
   122  	sync.RWMutex
   123  	state vmmState
   124  }
   125  
   126  func (s *firecrackerState) set(state vmmState) {
   127  	s.Lock()
   128  	defer s.Unlock()
   129  
   130  	s.state = state
   131  }
   132  
   133  // firecracker is an Hypervisor interface implementation for the firecracker VMM.
   134  type firecracker struct {
   135  	id            string //Unique ID per pod. Maps to the truncated sandbox id
   136  	vmPath        string //All jailed VM assets need to be under this
   137  	chrootBaseDir string //chroot base for the jailer
   138  	jailerRoot    string
   139  	socketPath    string
   140  	netNSPath     string
   141  	uid           string //UID and GID to be used for the VMM
   142  	gid           string
   143  
   144  	info FirecrackerInfo
   145  
   146  	firecrackerd *exec.Cmd           //Tracks the firecracker process itself
   147  	connection   *client.Firecracker //Tracks the current active connection
   148  
   149  	ctx            context.Context
   150  	config         HypervisorConfig
   151  	pendingDevices []firecrackerDevice // Devices to be added before the FC VM ready
   152  
   153  	state    firecrackerState
   154  	jailed   bool //Set to true if jailer is enabled
   155  	stateful bool //Set to true if running with shimv2
   156  
   157  	fcConfigPath string
   158  	fcConfig     *types.FcConfig // Parameters configured before VM starts
   159  
   160  	hotplugDriveOffset int
   161  }
   162  
   163  type firecrackerDevice struct {
   164  	dev     interface{}
   165  	devType deviceType
   166  }
   167  
   168  // Logger returns a logrus logger appropriate for logging firecracker  messages
   169  func (fc *firecracker) Logger() *logrus.Entry {
   170  	return virtLog.WithField("subsystem", "firecracker")
   171  }
   172  
   173  func (fc *firecracker) trace(name string) (opentracing.Span, context.Context) {
   174  	if fc.ctx == nil {
   175  		fc.Logger().WithField("type", "bug").Error("trace called before context set")
   176  		fc.ctx = context.Background()
   177  	}
   178  
   179  	span, ctx := opentracing.StartSpanFromContext(fc.ctx, name)
   180  
   181  	span.SetTag("subsystem", "hypervisor")
   182  	span.SetTag("type", "firecracker")
   183  
   184  	return span, ctx
   185  }
   186  
   187  //At some cases, when sandbox id is too long, it will incur error of overlong
   188  //firecracker API unix socket(fc.socketPath).
   189  //In Linux, sun_path could maximumly contains 108 bytes in size.
   190  //(http://man7.org/linux/man-pages/man7/unix.7.html)
   191  func (fc *firecracker) truncateID(id string) string {
   192  	if len(id) > 32 {
   193  		//truncate the id to only leave the size of UUID(128bit).
   194  		return id[:32]
   195  	}
   196  
   197  	return id
   198  }
   199  
   200  // For firecracker this call only sets the internal structure up.
   201  // The sandbox will be created and started through startSandbox().
   202  func (fc *firecracker) createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig, stateful bool) error {
   203  	fc.ctx = ctx
   204  
   205  	span, _ := fc.trace("createSandbox")
   206  	defer span.Finish()
   207  
   208  	//TODO: check validity of the hypervisor config provided
   209  	//https://github.com/kata-containers/runtime/issues/1065
   210  	fc.id = fc.truncateID(id)
   211  	fc.state.set(notReady)
   212  	fc.config = *hypervisorConfig
   213  	fc.stateful = stateful
   214  
   215  	// When running with jailer all resources need to be under
   216  	// a specific location and that location needs to have
   217  	// exec permission (i.e. should not be mounted noexec, e.g. /run, /var/run)
   218  	// Also unix domain socket names have a hard limit
   219  	// #define UNIX_PATH_MAX   108
   220  	// Keep it short and live within the jailer expected paths
   221  	// <chroot_base>/<exec_file_name>/<id>/
   222  	// Also jailer based on the id implicitly sets up cgroups under
   223  	// <cgroups_base>/<exec_file_name>/<id>/
   224  	hypervisorName := filepath.Base(hypervisorConfig.HypervisorPath)
   225  	//fs.RunStoragePath cannot be used as we need exec perms
   226  	fc.chrootBaseDir = filepath.Join("/run", storagePathSuffix)
   227  
   228  	fc.vmPath = filepath.Join(fc.chrootBaseDir, hypervisorName, fc.id)
   229  	fc.jailerRoot = filepath.Join(fc.vmPath, "root") // auto created by jailer
   230  
   231  	// Firecracker and jailer automatically creates default API socket under /run
   232  	// with the name of "firecracker.socket"
   233  	fc.socketPath = filepath.Join(fc.jailerRoot, "run", fcSocket)
   234  
   235  	// So we need to repopulate this at startSandbox where it is valid
   236  	fc.netNSPath = networkNS.NetNsPath
   237  
   238  	// Till we create lower privileged kata user run as root
   239  	// https://github.com/kata-containers/runtime/issues/1869
   240  	fc.uid = "0"
   241  	fc.gid = "0"
   242  
   243  	fc.fcConfig = &types.FcConfig{}
   244  	fc.fcConfigPath = filepath.Join(fc.vmPath, defaultFcConfig)
   245  	return nil
   246  }
   247  
   248  func (fc *firecracker) newFireClient() *client.Firecracker {
   249  	span, _ := fc.trace("newFireClient")
   250  	defer span.Finish()
   251  	httpClient := client.NewHTTPClient(strfmt.NewFormats())
   252  
   253  	socketTransport := &http.Transport{
   254  		DialContext: func(ctx context.Context, network, path string) (net.Conn, error) {
   255  			addr, err := net.ResolveUnixAddr("unix", fc.socketPath)
   256  			if err != nil {
   257  				return nil, err
   258  			}
   259  
   260  			return net.DialUnix("unix", nil, addr)
   261  		},
   262  	}
   263  
   264  	transport := httptransport.New(client.DefaultHost, client.DefaultBasePath, client.DefaultSchemes)
   265  	transport.SetLogger(fc.Logger())
   266  	transport.SetDebug(fc.Logger().Logger.Level == logrus.DebugLevel)
   267  	transport.Transport = socketTransport
   268  	httpClient.SetTransport(transport)
   269  
   270  	return httpClient
   271  }
   272  
   273  func (fc *firecracker) vmRunning() bool {
   274  	resp, err := fc.client().Operations.DescribeInstance(nil)
   275  	if err != nil {
   276  		fc.Logger().WithError(err).Error("getting vm status failed")
   277  		return false
   278  	}
   279  
   280  	// Be explicit
   281  	switch *resp.Payload.State {
   282  	case models.InstanceInfoStateStarting:
   283  		// Unsure what we should do here
   284  		fc.Logger().WithField("unexpected-state", models.InstanceInfoStateStarting).Debug("vmRunning")
   285  		return false
   286  	case models.InstanceInfoStateRunning:
   287  		return true
   288  	case models.InstanceInfoStateUninitialized:
   289  		return false
   290  	default:
   291  		return false
   292  	}
   293  }
   294  
   295  func (fc *firecracker) getVersionNumber() (string, error) {
   296  	args := []string{"--version"}
   297  	checkCMD := exec.Command(fc.config.HypervisorPath, args...)
   298  
   299  	data, err := checkCMD.Output()
   300  	if err != nil {
   301  		return "", fmt.Errorf("Running checking FC version command failed: %v", err)
   302  	}
   303  
   304  	var version string
   305  	fields := strings.Split(string(data), " ")
   306  	if len(fields) > 1 {
   307  		// The output format of `Firecracker --verion` is as follows
   308  		// Firecracker v0.21.1
   309  		version = strings.TrimPrefix(strings.TrimSpace(fields[1]), "v")
   310  		return version, nil
   311  	}
   312  
   313  	return "", errors.New("getting FC version failed, the output is malformed")
   314  }
   315  
   316  func (fc *firecracker) checkVersion(version string) error {
   317  	v, err := semver.Make(version)
   318  	if err != nil {
   319  		return fmt.Errorf("Malformed firecracker version: %v", err)
   320  	}
   321  
   322  	if v.LT(fcMinSupportedVersion) {
   323  		return fmt.Errorf("version %v is not supported. Minimum supported version of firecracker is %v", v.String(), fcMinSupportedVersion.String())
   324  	}
   325  
   326  	return nil
   327  }
   328  
   329  // waitVMMRunning will wait for timeout seconds for the VMM to be up and running.
   330  func (fc *firecracker) waitVMMRunning(timeout int) error {
   331  	span, _ := fc.trace("wait VMM to be running")
   332  	defer span.Finish()
   333  
   334  	if timeout < 0 {
   335  		return fmt.Errorf("Invalid timeout %ds", timeout)
   336  	}
   337  
   338  	timeStart := time.Now()
   339  	for {
   340  		if fc.vmRunning() {
   341  			return nil
   342  		}
   343  
   344  		if int(time.Since(timeStart).Seconds()) > timeout {
   345  			return fmt.Errorf("Failed to connect to firecrackerinstance (timeout %ds)", timeout)
   346  		}
   347  
   348  		time.Sleep(time.Duration(10) * time.Millisecond)
   349  	}
   350  }
   351  
   352  func (fc *firecracker) fcInit(timeout int) error {
   353  	span, _ := fc.trace("fcInit")
   354  	defer span.Finish()
   355  
   356  	var err error
   357  	//FC version set and check
   358  	if fc.info.Version, err = fc.getVersionNumber(); err != nil {
   359  		return err
   360  	}
   361  
   362  	if err := fc.checkVersion(fc.info.Version); err != nil {
   363  		return err
   364  	}
   365  
   366  	var cmd *exec.Cmd
   367  	var args []string
   368  
   369  	if fc.fcConfigPath, err = fc.fcJailResource(fc.fcConfigPath, defaultFcConfig); err != nil {
   370  		return err
   371  	}
   372  
   373  	if !fc.config.Debug && fc.stateful {
   374  		args = append(args, "--daemonize")
   375  	}
   376  
   377  	//https://github.com/firecracker-microvm/firecracker/blob/master/docs/jailer.md#jailer-usage
   378  	//--seccomp-level specifies whether seccomp filters should be installed and how restrictive they should be. Possible values are:
   379  	//0 : disabled.
   380  	//1 : basic filtering. This prohibits syscalls not whitelisted by Firecracker.
   381  	//2 (default): advanced filtering. This adds further checks on some of the parameters of the allowed syscalls.
   382  	if fc.jailed {
   383  		jailedArgs := []string{
   384  			"--id", fc.id,
   385  			"--node", "0", //FIXME: Comprehend NUMA topology or explicit ignore
   386  			"--exec-file", fc.config.HypervisorPath,
   387  			"--uid", "0", //https://github.com/kata-containers/runtime/issues/1869
   388  			"--gid", "0",
   389  			"--chroot-base-dir", fc.chrootBaseDir,
   390  		}
   391  		args = append(args, jailedArgs...)
   392  		if fc.netNSPath != "" {
   393  			args = append(args, "--netns", fc.netNSPath)
   394  		}
   395  		args = append(args, "--", "--config-file", fc.fcConfigPath)
   396  
   397  		cmd = exec.Command(fc.config.JailerPath, args...)
   398  	} else {
   399  		args = append(args,
   400  			"--api-sock", fc.socketPath,
   401  			"--config-file", fc.fcConfigPath)
   402  		cmd = exec.Command(fc.config.HypervisorPath, args...)
   403  	}
   404  
   405  	if fc.config.Debug && fc.stateful {
   406  		stdin, err := fc.watchConsole()
   407  		if err != nil {
   408  			return err
   409  		}
   410  
   411  		cmd.Stderr = stdin
   412  		cmd.Stdout = stdin
   413  	}
   414  
   415  	fc.Logger().WithField("hypervisor args", args).Debug()
   416  	fc.Logger().WithField("hypervisor cmd", cmd).Debug()
   417  
   418  	fc.Logger().Info("Starting VM")
   419  	if err := cmd.Start(); err != nil {
   420  		fc.Logger().WithField("Error starting firecracker", err).Debug()
   421  		return err
   422  	}
   423  
   424  	fc.info.PID = cmd.Process.Pid
   425  	fc.firecrackerd = cmd
   426  	fc.connection = fc.newFireClient()
   427  
   428  	if err := fc.waitVMMRunning(timeout); err != nil {
   429  		fc.Logger().WithField("fcInit failed:", err).Debug()
   430  		return err
   431  	}
   432  	return nil
   433  }
   434  
   435  func (fc *firecracker) fcEnd() (err error) {
   436  	span, _ := fc.trace("fcEnd")
   437  	defer span.Finish()
   438  
   439  	fc.Logger().Info("Stopping firecracker VM")
   440  
   441  	defer func() {
   442  		if err != nil {
   443  			fc.Logger().Info("fcEnd failed")
   444  		} else {
   445  			fc.Logger().Info("Firecracker VM stopped")
   446  		}
   447  	}()
   448  
   449  	pid := fc.info.PID
   450  
   451  	// Send a SIGTERM to the VM process to try to stop it properly
   452  	if err = syscall.Kill(pid, syscall.SIGTERM); err != nil {
   453  		if err == syscall.ESRCH {
   454  			return nil
   455  		}
   456  		return err
   457  	}
   458  
   459  	// Wait for the VM process to terminate
   460  	tInit := time.Now()
   461  	for {
   462  		if err = syscall.Kill(pid, syscall.Signal(0)); err != nil {
   463  			return nil
   464  		}
   465  
   466  		if time.Since(tInit).Seconds() >= fcStopSandboxTimeout {
   467  			fc.Logger().Warnf("VM still running after waiting %ds", fcStopSandboxTimeout)
   468  			break
   469  		}
   470  
   471  		// Let's avoid to run a too busy loop
   472  		time.Sleep(time.Duration(50) * time.Millisecond)
   473  	}
   474  
   475  	// Let's try with a hammer now, a SIGKILL should get rid of the
   476  	// VM process.
   477  	return syscall.Kill(pid, syscall.SIGKILL)
   478  }
   479  
   480  func (fc *firecracker) client() *client.Firecracker {
   481  	span, _ := fc.trace("client")
   482  	defer span.Finish()
   483  
   484  	if fc.connection == nil {
   485  		fc.connection = fc.newFireClient()
   486  	}
   487  
   488  	return fc.connection
   489  }
   490  
   491  func (fc *firecracker) createJailedDrive(name string) (string, error) {
   492  	// Don't bind mount the resource, just create a raw file
   493  	// that can be bind-mounted later
   494  	r := filepath.Join(fc.jailerRoot, name)
   495  	f, err := os.Create(r)
   496  	if err != nil {
   497  		return "", err
   498  	}
   499  	f.Close()
   500  
   501  	if fc.jailed {
   502  		// use path relative to the jail
   503  		r = filepath.Join("/", name)
   504  	}
   505  
   506  	return r, nil
   507  }
   508  
   509  // when running with jailer, firecracker binary will firstly be copied into fc.jailerRoot,
   510  // and then being executed there. Therefore we need to ensure fc.JailerRoot has exec permissions.
   511  func (fc *firecracker) fcRemountJailerRootWithExec() error {
   512  	if err := bindMount(context.Background(), fc.jailerRoot, fc.jailerRoot, false, "shared"); err != nil {
   513  		fc.Logger().WithField("JailerRoot", fc.jailerRoot).Errorf("bindMount failed: %v", err)
   514  		return err
   515  	}
   516  
   517  	// /run is normally mounted with rw, nosuid(MS_NOSUID), relatime(MS_RELATIME), noexec(MS_NOEXEC).
   518  	// we re-mount jailerRoot to deliberately leave out MS_NOEXEC.
   519  	if err := remount(context.Background(), syscall.MS_NOSUID|syscall.MS_RELATIME, fc.jailerRoot); err != nil {
   520  		fc.Logger().WithField("JailerRoot", fc.jailerRoot).Errorf("Re-mount failed: %v", err)
   521  		return err
   522  	}
   523  
   524  	return nil
   525  }
   526  
   527  func (fc *firecracker) fcJailResource(src, dst string) (string, error) {
   528  	if src == "" || dst == "" {
   529  		return "", fmt.Errorf("fcJailResource: invalid jail locations: src:%v, dst:%v",
   530  			src, dst)
   531  	}
   532  	jailedLocation := filepath.Join(fc.jailerRoot, dst)
   533  	if err := bindMount(context.Background(), src, jailedLocation, false, "slave"); err != nil {
   534  		fc.Logger().WithField("bindMount failed", err).Error()
   535  		return "", err
   536  	}
   537  
   538  	if !fc.jailed {
   539  		return jailedLocation, nil
   540  	}
   541  
   542  	// This is the path within the jailed root
   543  	absPath := filepath.Join("/", dst)
   544  	return absPath, nil
   545  }
   546  
   547  func (fc *firecracker) fcSetBootSource(kernelPath, initrdPath, params string) error {
   548  	span, _ := fc.trace("fcSetBootSource")
   549  	defer span.Finish()
   550  	fc.Logger().WithFields(logrus.Fields{
   551  		"kernel-path":   kernelPath,
   552  		"initrd-path":   initrdPath,
   553  		"kernel-params": params,
   554  	}).Debug("fcSetBootSource")
   555  
   556  	kernelPath, err := fc.fcJailResource(kernelPath, fcKernel)
   557  	if err != nil {
   558  		return err
   559  	}
   560  
   561  	if initrdPath != "" {
   562  		initrdPath, err = fc.fcJailResource(initrdPath, fcInitrd)
   563  		if err != nil {
   564  			return err
   565  		}
   566  	}
   567  
   568  	src := &models.BootSource{
   569  		KernelImagePath: &kernelPath,
   570  		BootArgs:        params,
   571  		InitrdPath:      initrdPath,
   572  	}
   573  
   574  	fc.fcConfig.BootSource = src
   575  
   576  	return nil
   577  }
   578  
   579  func (fc *firecracker) fcSetVMRootfs(path string) error {
   580  	span, _ := fc.trace("fcSetVMRootfs")
   581  	defer span.Finish()
   582  
   583  	jailedRootfs, err := fc.fcJailResource(path, fcRootfs)
   584  	if err != nil {
   585  		return err
   586  	}
   587  
   588  	driveID := "rootfs"
   589  	isReadOnly := true
   590  	//Add it as a regular block device
   591  	//This allows us to use a partitoned root block device
   592  	isRootDevice := false
   593  	// This is the path within the jailed root
   594  	drive := &models.Drive{
   595  		DriveID:      &driveID,
   596  		IsReadOnly:   &isReadOnly,
   597  		IsRootDevice: &isRootDevice,
   598  		PathOnHost:   &jailedRootfs,
   599  	}
   600  
   601  	fc.fcConfig.Drives = append(fc.fcConfig.Drives, drive)
   602  
   603  	return nil
   604  }
   605  
   606  func (fc *firecracker) fcSetVMBaseConfig(mem int64, vcpus int64, htEnabled bool) {
   607  	span, _ := fc.trace("fcSetVMBaseConfig")
   608  	defer span.Finish()
   609  	fc.Logger().WithFields(logrus.Fields{"mem": mem,
   610  		"vcpus":     vcpus,
   611  		"htEnabled": htEnabled}).Debug("fcSetVMBaseConfig")
   612  
   613  	cfg := &models.MachineConfiguration{
   614  		HtEnabled:  &htEnabled,
   615  		MemSizeMib: &mem,
   616  		VcpuCount:  &vcpus,
   617  	}
   618  
   619  	fc.fcConfig.MachineConfig = cfg
   620  }
   621  
   622  func (fc *firecracker) fcSetLogger() error {
   623  	span, _ := fc.trace("fcSetLogger")
   624  	defer span.Finish()
   625  
   626  	fcLogLevel := "Error"
   627  	if fc.config.Debug {
   628  		fcLogLevel = "Debug"
   629  	}
   630  
   631  	// listen to log fifo file and transfer error info
   632  	jailedLogFifo, err := fc.fcListenToFifo(fcLogFifo)
   633  	if err != nil {
   634  		return fmt.Errorf("Failed setting log: %s", err)
   635  	}
   636  
   637  	// listen to metrics file and transfer error info
   638  	jailedMetricsFifo, err := fc.fcListenToFifo(fcMetricsFifo)
   639  	if err != nil {
   640  		return fmt.Errorf("Failed setting log: %s", err)
   641  	}
   642  
   643  	fc.fcConfig.Logger = &models.Logger{
   644  		Level:       &fcLogLevel,
   645  		LogFifo:     &jailedLogFifo,
   646  		MetricsFifo: &jailedMetricsFifo,
   647  	}
   648  
   649  	return err
   650  }
   651  
   652  func (fc *firecracker) fcListenToFifo(fifoName string) (string, error) {
   653  	fcFifoPath := filepath.Join(fc.vmPath, fifoName)
   654  	fcFifo, err := fifo.OpenFifo(context.Background(), fcFifoPath, syscall.O_CREAT|syscall.O_RDONLY|syscall.O_NONBLOCK, 0)
   655  	if err != nil {
   656  		return "", fmt.Errorf("Failed to open/create fifo file %s", err)
   657  	}
   658  
   659  	jailedFifoPath, err := fc.fcJailResource(fcFifoPath, fifoName)
   660  	if err != nil {
   661  		return "", err
   662  	}
   663  
   664  	go func() {
   665  		scanner := bufio.NewScanner(fcFifo)
   666  		for scanner.Scan() {
   667  			fc.Logger().WithFields(logrus.Fields{
   668  				"fifoName": fifoName,
   669  				"contents": scanner.Text()}).Error("firecracker failed")
   670  		}
   671  
   672  		if err := scanner.Err(); err != nil {
   673  			fc.Logger().WithError(err).Errorf("Failed reading firecracker fifo file")
   674  		}
   675  
   676  		if err := fcFifo.Close(); err != nil {
   677  			fc.Logger().WithError(err).Errorf("Failed closing firecracker fifo file")
   678  		}
   679  	}()
   680  
   681  	return jailedFifoPath, nil
   682  }
   683  
   684  func (fc *firecracker) fcInitConfiguration() error {
   685  	// Firecracker API socket(firecracker.socket) is automatically created
   686  	// under /run dir.
   687  	err := os.MkdirAll(filepath.Join(fc.jailerRoot, "run"), DirMode)
   688  	if err != nil {
   689  		return err
   690  	}
   691  	defer func() {
   692  		if err != nil {
   693  			if err := os.RemoveAll(fc.vmPath); err != nil {
   694  				fc.Logger().WithError(err).Error("Fail to clean up vm directory")
   695  			}
   696  		}
   697  	}()
   698  
   699  	if fc.config.JailerPath != "" {
   700  		fc.jailed = true
   701  		if err := fc.fcRemountJailerRootWithExec(); err != nil {
   702  			return err
   703  		}
   704  	}
   705  
   706  	fc.fcSetVMBaseConfig(int64(fc.config.MemorySize),
   707  		int64(fc.config.NumVCPUs), false)
   708  
   709  	kernelPath, err := fc.config.KernelAssetPath()
   710  	if err != nil {
   711  		return err
   712  	}
   713  
   714  	if fc.config.Debug && fc.stateful {
   715  		fcKernelParams = append(fcKernelParams, Param{"console", "ttyS0"})
   716  	} else {
   717  		fcKernelParams = append(fcKernelParams, []Param{
   718  			{"8250.nr_uarts", "0"},
   719  			// Tell agent where to send the logs
   720  			{"agent.log_vport", fmt.Sprintf("%d", vSockLogsPort)},
   721  		}...)
   722  	}
   723  
   724  	initrdPath, err := fc.config.InitrdAssetPath()
   725  	if err != nil {
   726  		return err
   727  	}
   728  
   729  	kernelParams := append(fc.config.KernelParams, fcKernelParams...)
   730  	if initrdPath == "" {
   731  		kernelParams = append(kernelParams, commonVirtioblkKernelRootParams...)
   732  	}
   733  	strParams := SerializeParams(kernelParams, "=")
   734  	formattedParams := strings.Join(strParams, " ")
   735  	if err := fc.fcSetBootSource(kernelPath, initrdPath, formattedParams); err != nil {
   736  		return err
   737  	}
   738  
   739  	if initrdPath == "" {
   740  		image, err := fc.config.ImageAssetPath()
   741  		if err != nil {
   742  			return err
   743  		}
   744  
   745  		if err := fc.fcSetVMRootfs(image); err != nil {
   746  			return err
   747  		}
   748  	}
   749  
   750  	fc.hotplugDriveOffset = len(fc.fcConfig.Drives)
   751  
   752  	if err := fc.createDiskPool(); err != nil {
   753  		return err
   754  	}
   755  
   756  	if err := fc.fcSetLogger(); err != nil {
   757  		return err
   758  	}
   759  
   760  	fc.state.set(cfReady)
   761  	for _, d := range fc.pendingDevices {
   762  		if err := fc.addDevice(d.dev, d.devType); err != nil {
   763  			return err
   764  		}
   765  	}
   766  
   767  	return nil
   768  }
   769  
   770  // startSandbox will start the hypervisor for the given sandbox.
   771  // In the context of firecracker, this will start the hypervisor,
   772  // for configuration, but not yet start the actual virtual machine
   773  func (fc *firecracker) startSandbox(timeout int) error {
   774  	span, _ := fc.trace("startSandbox")
   775  	defer span.Finish()
   776  
   777  	if err := fc.fcInitConfiguration(); err != nil {
   778  		return err
   779  	}
   780  
   781  	data, errJSON := json.MarshalIndent(fc.fcConfig, "", "\t")
   782  	if errJSON != nil {
   783  		return errJSON
   784  	}
   785  
   786  	if err := ioutil.WriteFile(fc.fcConfigPath, data, 0640); err != nil {
   787  		return err
   788  	}
   789  
   790  	var err error
   791  	defer func() {
   792  		if err != nil {
   793  			fc.fcEnd()
   794  		}
   795  	}()
   796  
   797  	// This needs to be done as late as possible, since all processes that
   798  	// are executed by kata-runtime after this call, run with the SELinux
   799  	// label. If these processes require privileged, we do not want to run
   800  	// them under confinement.
   801  	if err := label.SetProcessLabel(fc.config.SELinuxProcessLabel); err != nil {
   802  		return err
   803  	}
   804  	defer label.SetProcessLabel("")
   805  
   806  	err = fc.fcInit(fcTimeout)
   807  	if err != nil {
   808  		return err
   809  	}
   810  
   811  	// make sure 'others' don't have access to this socket
   812  	err = os.Chmod(filepath.Join(fc.jailerRoot, defaultHybridVSocketName), 0640)
   813  	if err != nil {
   814  		return fmt.Errorf("Could not change socket permissions: %v", err)
   815  	}
   816  
   817  	fc.state.set(vmReady)
   818  	return nil
   819  }
   820  
   821  func fcDriveIndexToID(i int) string {
   822  	return "drive_" + strconv.Itoa(i)
   823  }
   824  
   825  // Creates a disk pool to attach container virtio-block devices with
   826  // fcUpdateBlockDrive
   827  func (fc *firecracker) createDiskPool() error {
   828  	span, _ := fc.trace("createDiskPool")
   829  	defer span.Finish()
   830  
   831  	for i := 0; i < fcDiskPoolSize; i++ {
   832  		driveID := fcDriveIndexToID(i)
   833  		isReadOnly := false
   834  		isRootDevice := false
   835  
   836  		// Create a temporary file as a placeholder backend for the drive
   837  		jailedDrive, err := fc.createJailedDrive(driveID)
   838  		if err != nil {
   839  			return err
   840  		}
   841  
   842  		drive := &models.Drive{
   843  			DriveID:      &driveID,
   844  			IsReadOnly:   &isReadOnly,
   845  			IsRootDevice: &isRootDevice,
   846  			PathOnHost:   &jailedDrive,
   847  		}
   848  
   849  		fc.fcConfig.Drives = append(fc.fcConfig.Drives, drive)
   850  	}
   851  
   852  	return nil
   853  }
   854  
   855  func (fc *firecracker) umountResource(jailedPath string) {
   856  	hostPath := filepath.Join(fc.jailerRoot, jailedPath)
   857  	fc.Logger().WithField("resource", hostPath).Debug("Unmounting resource")
   858  	err := syscall.Unmount(hostPath, syscall.MNT_DETACH)
   859  	if err != nil {
   860  		fc.Logger().WithError(err).Error("Failed to umount resource")
   861  	}
   862  }
   863  
   864  // cleanup all jail artifacts
   865  func (fc *firecracker) cleanupJail() {
   866  	span, _ := fc.trace("cleanupJail")
   867  	defer span.Finish()
   868  
   869  	fc.umountResource(fcKernel)
   870  	fc.umountResource(fcInitrd)
   871  	fc.umountResource(fcRootfs)
   872  	fc.umountResource(fcLogFifo)
   873  	fc.umountResource(fcMetricsFifo)
   874  	fc.umountResource(defaultFcConfig)
   875  	// if running with jailer, we also need to umount fc.jailerRoot
   876  	if fc.config.JailerPath != "" {
   877  		if err := syscall.Unmount(fc.jailerRoot, syscall.MNT_DETACH); err != nil {
   878  			fc.Logger().WithField("JailerRoot", fc.jailerRoot).WithError(err).Error("Failed to umount")
   879  		}
   880  	}
   881  
   882  	fc.Logger().WithField("cleaningJail", fc.vmPath).Info()
   883  	if err := os.RemoveAll(fc.vmPath); err != nil {
   884  		fc.Logger().WithField("cleanupJail failed", err).Error()
   885  	}
   886  }
   887  
   888  // stopSandbox will stop the Sandbox's VM.
   889  func (fc *firecracker) stopSandbox() (err error) {
   890  	span, _ := fc.trace("stopSandbox")
   891  	defer span.Finish()
   892  
   893  	return fc.fcEnd()
   894  }
   895  
   896  func (fc *firecracker) pauseSandbox() error {
   897  	return nil
   898  }
   899  
   900  func (fc *firecracker) saveSandbox() error {
   901  	return nil
   902  }
   903  
   904  func (fc *firecracker) resumeSandbox() error {
   905  	return nil
   906  }
   907  
   908  func (fc *firecracker) fcAddVsock(hvs types.HybridVSock) {
   909  	span, _ := fc.trace("fcAddVsock")
   910  	defer span.Finish()
   911  
   912  	udsPath := hvs.UdsPath
   913  	if fc.jailed {
   914  		udsPath = filepath.Join("/", defaultHybridVSocketName)
   915  	}
   916  
   917  	vsockID := "root"
   918  	ctxID := defaultGuestVSockCID
   919  	vsock := &models.Vsock{
   920  		GuestCid: &ctxID,
   921  		UdsPath:  &udsPath,
   922  		VsockID:  &vsockID,
   923  	}
   924  
   925  	fc.fcConfig.Vsock = vsock
   926  }
   927  
   928  func (fc *firecracker) fcAddNetDevice(endpoint Endpoint) {
   929  	span, _ := fc.trace("fcAddNetDevice")
   930  	defer span.Finish()
   931  
   932  	ifaceID := endpoint.Name()
   933  	ifaceCfg := &models.NetworkInterface{
   934  		AllowMmdsRequests: false,
   935  		GuestMac:          endpoint.HardwareAddr(),
   936  		IfaceID:           &ifaceID,
   937  		HostDevName:       &endpoint.NetworkPair().TapInterface.TAPIface.Name,
   938  	}
   939  
   940  	fc.fcConfig.NetworkInterfaces = append(fc.fcConfig.NetworkInterfaces, ifaceCfg)
   941  }
   942  
   943  func (fc *firecracker) fcAddBlockDrive(drive config.BlockDrive) error {
   944  	span, _ := fc.trace("fcAddBlockDrive")
   945  	defer span.Finish()
   946  
   947  	driveID := drive.ID
   948  	isReadOnly := false
   949  	isRootDevice := false
   950  
   951  	jailedDrive, err := fc.fcJailResource(drive.File, driveID)
   952  	if err != nil {
   953  		fc.Logger().WithField("fcAddBlockDrive failed", err).Error()
   954  		return err
   955  	}
   956  	driveFc := &models.Drive{
   957  		DriveID:      &driveID,
   958  		IsReadOnly:   &isReadOnly,
   959  		IsRootDevice: &isRootDevice,
   960  		PathOnHost:   &jailedDrive,
   961  	}
   962  
   963  	fc.fcConfig.Drives = append(fc.fcConfig.Drives, driveFc)
   964  
   965  	return nil
   966  }
   967  
   968  // Firecracker supports replacing the host drive used once the VM has booted up
   969  func (fc *firecracker) fcUpdateBlockDrive(path, id string) error {
   970  	span, _ := fc.trace("fcUpdateBlockDrive")
   971  	defer span.Finish()
   972  
   973  	// Use the global block index as an index into the pool of the devices
   974  	// created for firecracker.
   975  	driveParams := ops.NewPatchGuestDriveByIDParams()
   976  	driveParams.SetDriveID(id)
   977  
   978  	driveFc := &models.PartialDrive{
   979  		DriveID:    &id,
   980  		PathOnHost: &path, //This is the only property that can be modified
   981  	}
   982  
   983  	driveParams.SetBody(driveFc)
   984  	if _, err := fc.client().Operations.PatchGuestDriveByID(driveParams); err != nil {
   985  		return err
   986  	}
   987  
   988  	return nil
   989  }
   990  
   991  // addDevice will add extra devices to firecracker.  Limited to configure before the
   992  // virtual machine starts.  Devices include drivers and network interfaces only.
   993  func (fc *firecracker) addDevice(devInfo interface{}, devType deviceType) error {
   994  	span, _ := fc.trace("addDevice")
   995  	defer span.Finish()
   996  
   997  	fc.state.RLock()
   998  	defer fc.state.RUnlock()
   999  
  1000  	if fc.state.state == notReady {
  1001  		dev := firecrackerDevice{
  1002  			dev:     devInfo,
  1003  			devType: devType,
  1004  		}
  1005  		fc.Logger().Info("FC not ready, queueing device")
  1006  		fc.pendingDevices = append(fc.pendingDevices, dev)
  1007  		return nil
  1008  	}
  1009  
  1010  	var err error
  1011  	switch v := devInfo.(type) {
  1012  	case Endpoint:
  1013  		fc.Logger().WithField("device-type-endpoint", devInfo).Info("Adding device")
  1014  		fc.fcAddNetDevice(v)
  1015  	case config.BlockDrive:
  1016  		fc.Logger().WithField("device-type-blockdrive", devInfo).Info("Adding device")
  1017  		err = fc.fcAddBlockDrive(v)
  1018  	case types.HybridVSock:
  1019  		fc.Logger().WithField("device-type-hybrid-vsock", devInfo).Info("Adding device")
  1020  		fc.fcAddVsock(v)
  1021  	default:
  1022  		fc.Logger().WithField("unknown-device-type", devInfo).Error("Adding device")
  1023  	}
  1024  
  1025  	return err
  1026  }
  1027  
  1028  // hotplugBlockDevice supported in Firecracker VMM
  1029  // hot add or remove a block device.
  1030  func (fc *firecracker) hotplugBlockDevice(drive config.BlockDrive, op operation) (interface{}, error) {
  1031  	var path string
  1032  	var err error
  1033  	driveID := fcDriveIndexToID(drive.Index)
  1034  
  1035  	if op == addDevice {
  1036  		//The drive placeholder has to exist prior to Update
  1037  		path, err = fc.fcJailResource(drive.File, driveID)
  1038  		if err != nil {
  1039  			fc.Logger().WithError(err).WithField("resource", drive.File).Error("Could not jail resource")
  1040  			return nil, err
  1041  		}
  1042  	} else {
  1043  		// umount the disk, it's no longer needed.
  1044  		fc.umountResource(driveID)
  1045  		// use previous raw file created at createDiskPool, that way
  1046  		// the resource is released by firecracker and it can be destroyed in the host
  1047  		path = filepath.Join(fc.jailerRoot, driveID)
  1048  	}
  1049  
  1050  	return nil, fc.fcUpdateBlockDrive(path, driveID)
  1051  }
  1052  
  1053  // hotplugAddDevice supported in Firecracker VMM
  1054  func (fc *firecracker) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
  1055  	span, _ := fc.trace("hotplugAddDevice")
  1056  	defer span.Finish()
  1057  
  1058  	switch devType {
  1059  	case blockDev:
  1060  		return fc.hotplugBlockDevice(*devInfo.(*config.BlockDrive), addDevice)
  1061  	default:
  1062  		fc.Logger().WithFields(logrus.Fields{"devInfo": devInfo,
  1063  			"deviceType": devType}).Warn("hotplugAddDevice: unsupported device")
  1064  		return nil, fmt.Errorf("Could not hot add device: unsupported device: %v, type: %v",
  1065  			devInfo, devType)
  1066  	}
  1067  }
  1068  
  1069  // hotplugRemoveDevice supported in Firecracker VMM
  1070  func (fc *firecracker) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
  1071  	span, _ := fc.trace("hotplugRemoveDevice")
  1072  	defer span.Finish()
  1073  
  1074  	switch devType {
  1075  	case blockDev:
  1076  		return fc.hotplugBlockDevice(*devInfo.(*config.BlockDrive), removeDevice)
  1077  	default:
  1078  		fc.Logger().WithFields(logrus.Fields{"devInfo": devInfo,
  1079  			"deviceType": devType}).Error("hotplugRemoveDevice: unsupported device")
  1080  		return nil, fmt.Errorf("Could not hot remove device: unsupported device: %v, type: %v",
  1081  			devInfo, devType)
  1082  	}
  1083  }
  1084  
  1085  // getSandboxConsole builds the path of the console where we can read
  1086  // logs coming from the sandbox.
  1087  func (fc *firecracker) getSandboxConsole(id string) (string, error) {
  1088  	return fmt.Sprintf("%s://%s:%d", kataclient.HybridVSockScheme, filepath.Join(fc.jailerRoot, defaultHybridVSocketName), vSockLogsPort), nil
  1089  }
  1090  
  1091  func (fc *firecracker) disconnect() {
  1092  	fc.state.set(notReady)
  1093  }
  1094  
  1095  // Adds all capabilities supported by firecracker implementation of hypervisor interface
  1096  func (fc *firecracker) capabilities() types.Capabilities {
  1097  	span, _ := fc.trace("capabilities")
  1098  	defer span.Finish()
  1099  	var caps types.Capabilities
  1100  	caps.SetBlockDeviceHotplugSupport()
  1101  
  1102  	return caps
  1103  }
  1104  
  1105  func (fc *firecracker) hypervisorConfig() HypervisorConfig {
  1106  	return fc.config
  1107  }
  1108  
  1109  func (fc *firecracker) resizeMemory(reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, memoryDevice, error) {
  1110  	return 0, memoryDevice{}, nil
  1111  }
  1112  
  1113  func (fc *firecracker) resizeVCPUs(reqVCPUs uint32) (currentVCPUs uint32, newVCPUs uint32, err error) {
  1114  	return 0, 0, nil
  1115  }
  1116  
  1117  // This is used to apply cgroup information on the host.
  1118  //
  1119  // As suggested by https://github.com/firecracker-microvm/firecracker/issues/718,
  1120  // let's use `ps -T -p <pid>` to get fc vcpu info.
  1121  func (fc *firecracker) getThreadIDs() (vcpuThreadIDs, error) {
  1122  	var vcpuInfo vcpuThreadIDs
  1123  
  1124  	vcpuInfo.vcpus = make(map[int]int)
  1125  	parent, err := utils.NewProc(fc.info.PID)
  1126  	if err != nil {
  1127  		return vcpuInfo, err
  1128  	}
  1129  	children, err := parent.Children()
  1130  	if err != nil {
  1131  		return vcpuInfo, err
  1132  	}
  1133  	for _, child := range children {
  1134  		comm, err := child.Comm()
  1135  		if err != nil {
  1136  			return vcpuInfo, errors.New("Invalid fc thread info")
  1137  		}
  1138  		if !strings.HasPrefix(comm, "fc_vcpu") {
  1139  			continue
  1140  		}
  1141  		cpus := strings.SplitAfter(comm, "fc_vcpu")
  1142  		if len(cpus) != 2 {
  1143  			return vcpuInfo, errors.Errorf("Invalid fc thread info: %v", comm)
  1144  		}
  1145  		cpuID, err := strconv.ParseInt(cpus[1], 10, 32)
  1146  		if err != nil {
  1147  			return vcpuInfo, errors.Wrapf(err, "Invalid fc thread info: %v", comm)
  1148  		}
  1149  		vcpuInfo.vcpus[int(cpuID)] = child.PID
  1150  	}
  1151  
  1152  	return vcpuInfo, nil
  1153  }
  1154  
  1155  func (fc *firecracker) cleanup() error {
  1156  	fc.cleanupJail()
  1157  	return nil
  1158  }
  1159  
  1160  func (fc *firecracker) getPids() []int {
  1161  	return []int{fc.info.PID}
  1162  }
  1163  
  1164  func (fc *firecracker) fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error {
  1165  	return errors.New("firecracker is not supported by VM cache")
  1166  }
  1167  
  1168  func (fc *firecracker) toGrpc() ([]byte, error) {
  1169  	return nil, errors.New("firecracker is not supported by VM cache")
  1170  }
  1171  
  1172  func (fc *firecracker) save() (s persistapi.HypervisorState) {
  1173  	s.Pid = fc.info.PID
  1174  	s.Type = string(FirecrackerHypervisor)
  1175  	return
  1176  }
  1177  
  1178  func (fc *firecracker) load(s persistapi.HypervisorState) {
  1179  	fc.info.PID = s.Pid
  1180  }
  1181  
  1182  func (fc *firecracker) check() error {
  1183  	if err := syscall.Kill(fc.info.PID, syscall.Signal(0)); err != nil {
  1184  		return errors.Wrapf(err, "failed to ping fc process")
  1185  	}
  1186  
  1187  	return nil
  1188  }
  1189  
  1190  func (fc *firecracker) generateSocket(id string, useVsock bool) (interface{}, error) {
  1191  	if !useVsock {
  1192  		return nil, fmt.Errorf("Can't start firecracker: vsocks is disabled")
  1193  	}
  1194  
  1195  	fc.Logger().Debug("Using hybrid-vsock endpoint")
  1196  	udsPath := filepath.Join(fc.jailerRoot, defaultHybridVSocketName)
  1197  
  1198  	return types.HybridVSock{
  1199  		UdsPath: udsPath,
  1200  		Port:    uint32(vSockPort),
  1201  	}, nil
  1202  }
  1203  
  1204  func (fc *firecracker) watchConsole() (*os.File, error) {
  1205  	master, slave, err := console.NewPty()
  1206  	if err != nil {
  1207  		fc.Logger().WithField("Error create pseudo tty", err).Debug()
  1208  		return nil, err
  1209  	}
  1210  
  1211  	stdio, err := os.OpenFile(slave, syscall.O_RDWR, 0700)
  1212  	if err != nil {
  1213  		fc.Logger().WithError(err).Debugf("open pseudo tty %s", slave)
  1214  		return nil, err
  1215  	}
  1216  
  1217  	go func() {
  1218  		scanner := bufio.NewScanner(master)
  1219  		for scanner.Scan() {
  1220  			fc.Logger().WithFields(logrus.Fields{
  1221  				"sandbox":   fc.id,
  1222  				"vmconsole": scanner.Text(),
  1223  			}).Infof("reading guest console")
  1224  		}
  1225  
  1226  		if err := scanner.Err(); err != nil {
  1227  			if err == io.EOF {
  1228  				fc.Logger().Info("console watcher quits")
  1229  			} else {
  1230  				fc.Logger().WithError(err).Error("Failed to read guest console")
  1231  			}
  1232  		}
  1233  	}()
  1234  
  1235  	return stdio, nil
  1236  }
  1237  
  1238  func (fc *firecracker) getVirtDriveOffset() int {
  1239  	return fc.hotplugDriveOffset
  1240  }