gitee.com/leisunstar/runtime@v0.0.0-20200521203717-5cef3e7b53f9/virtcontainers/fc.go (about)

     1  // Copyright (c) 2018 Intel Corporation
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  //
     5  
     6  package virtcontainers
     7  
     8  import (
     9  	"bufio"
    10  	"context"
    11  	"encoding/json"
    12  	"fmt"
    13  	"io"
    14  	"io/ioutil"
    15  	"net"
    16  	"net/http"
    17  	"os"
    18  	"os/exec"
    19  	"path/filepath"
    20  	"strconv"
    21  	"strings"
    22  	"sync"
    23  	"syscall"
    24  	"time"
    25  
    26  	"github.com/containerd/fifo"
    27  	httptransport "github.com/go-openapi/runtime/client"
    28  	"github.com/go-openapi/strfmt"
    29  	kataclient "github.com/kata-containers/agent/protocols/client"
    30  	persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api"
    31  	"github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client"
    32  	models "github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client/models"
    33  	ops "github.com/kata-containers/runtime/virtcontainers/pkg/firecracker/client/operations"
    34  	"github.com/opencontainers/selinux/go-selinux/label"
    35  	opentracing "github.com/opentracing/opentracing-go"
    36  	"github.com/pkg/errors"
    37  	"github.com/sirupsen/logrus"
    38  
    39  	"github.com/blang/semver"
    40  	"github.com/containerd/console"
    41  	"github.com/kata-containers/runtime/virtcontainers/device/config"
    42  	"github.com/kata-containers/runtime/virtcontainers/types"
    43  	"github.com/kata-containers/runtime/virtcontainers/utils"
    44  )
    45  
    46  type vmmState uint8
    47  
    48  const (
    49  	notReady vmmState = iota
    50  	cfReady
    51  	vmReady
    52  )
    53  
    54  const (
    55  	//fcTimeout is the maximum amount of time in seconds to wait for the VMM to respond
    56  	fcTimeout = 10
    57  	fcSocket  = "firecracker.socket"
    58  	//Name of the files within jailer root
    59  	//Having predefined names helps with cleanup
    60  	fcKernel             = "vmlinux"
    61  	fcRootfs             = "rootfs"
    62  	fcStopSandboxTimeout = 15
    63  	// This indicates the number of block devices that can be attached to the
    64  	// firecracker guest VM.
    65  	// We attach a pool of placeholder drives before the guest has started, and then
    66  	// patch the replace placeholder drives with drives with actual contents.
    67  	fcDiskPoolSize           = 8
    68  	defaultHybridVSocketName = "kata.hvsock"
    69  
    70  	// This is the first usable vsock context ID. All the vsocks can use the same
    71  	// ID, since it's only used in the guest.
    72  	defaultGuestVSockCID = int64(0x3)
    73  
    74  	// This is related to firecracker logging scheme
    75  	fcLogFifo     = "logs.fifo"
    76  	fcMetricsFifo = "metrics.fifo"
    77  
    78  	defaultFcConfig = "fcConfig.json"
    79  	// storagePathSuffix mirrors persist/fs/fs.go:storagePathSuffix
    80  	storagePathSuffix = "vc"
    81  )
    82  
    83  // Specify the minimum version of firecracker supported
    84  var fcMinSupportedVersion = semver.MustParse("0.21.1")
    85  
    86  var fcKernelParams = append(commonVirtioblkKernelRootParams, []Param{
    87  	// The boot source is the first partition of the first block device added
    88  	{"pci", "off"},
    89  	{"reboot", "k"},
    90  	{"panic", "1"},
    91  	{"iommu", "off"},
    92  	{"net.ifnames", "0"},
    93  	{"random.trust_cpu", "on"},
    94  
    95  	// Firecracker doesn't support ACPI
    96  	// Fix kernel error "ACPI BIOS Error (bug)"
    97  	{"acpi", "off"},
    98  }...)
    99  
   100  func (s vmmState) String() string {
   101  	switch s {
   102  	case notReady:
   103  		return "FC not ready"
   104  	case cfReady:
   105  		return "FC configure ready"
   106  	case vmReady:
   107  		return "FC VM ready"
   108  	}
   109  
   110  	return ""
   111  }
   112  
   113  // FirecrackerInfo contains information related to the hypervisor that we
   114  // want to store on disk
   115  type FirecrackerInfo struct {
   116  	PID     int
   117  	Version string
   118  }
   119  
   120  type firecrackerState struct {
   121  	sync.RWMutex
   122  	state vmmState
   123  }
   124  
   125  func (s *firecrackerState) set(state vmmState) {
   126  	s.Lock()
   127  	defer s.Unlock()
   128  
   129  	s.state = state
   130  }
   131  
   132  // firecracker is an Hypervisor interface implementation for the firecracker VMM.
   133  type firecracker struct {
   134  	id            string //Unique ID per pod. Normally maps to the sandbox id
   135  	vmPath        string //All jailed VM assets need to be under this
   136  	chrootBaseDir string //chroot base for the jailer
   137  	jailerRoot    string
   138  	socketPath    string
   139  	netNSPath     string
   140  	uid           string //UID and GID to be used for the VMM
   141  	gid           string
   142  
   143  	info FirecrackerInfo
   144  
   145  	firecrackerd *exec.Cmd           //Tracks the firecracker process itself
   146  	connection   *client.Firecracker //Tracks the current active connection
   147  
   148  	ctx            context.Context
   149  	config         HypervisorConfig
   150  	pendingDevices []firecrackerDevice // Devices to be added before the FC VM ready
   151  
   152  	state    firecrackerState
   153  	jailed   bool //Set to true if jailer is enabled
   154  	stateful bool //Set to true if running with shimv2
   155  
   156  	fcConfigPath string
   157  	fcConfig     *types.FcConfig // Parameters configured before VM starts
   158  }
   159  
   160  type firecrackerDevice struct {
   161  	dev     interface{}
   162  	devType deviceType
   163  }
   164  
   165  // Logger returns a logrus logger appropriate for logging firecracker  messages
   166  func (fc *firecracker) Logger() *logrus.Entry {
   167  	return virtLog.WithField("subsystem", "firecracker")
   168  }
   169  
   170  func (fc *firecracker) trace(name string) (opentracing.Span, context.Context) {
   171  	if fc.ctx == nil {
   172  		fc.Logger().WithField("type", "bug").Error("trace called before context set")
   173  		fc.ctx = context.Background()
   174  	}
   175  
   176  	span, ctx := opentracing.StartSpanFromContext(fc.ctx, name)
   177  
   178  	span.SetTag("subsystem", "hypervisor")
   179  	span.SetTag("type", "firecracker")
   180  
   181  	return span, ctx
   182  }
   183  
   184  //At some cases, when sandbox id is too long, it will incur error of overlong
   185  //firecracker API unix socket(fc.socketPath).
   186  //In Linux, sun_path could maximumly contains 108 bytes in size.
   187  //(http://man7.org/linux/man-pages/man7/unix.7.html)
   188  func (fc *firecracker) truncateID(id string) string {
   189  	if len(id) > 32 {
   190  		//truncate the id to only leave the size of UUID(128bit).
   191  		return id[:32]
   192  	}
   193  
   194  	return id
   195  }
   196  
   197  // For firecracker this call only sets the internal structure up.
   198  // The sandbox will be created and started through startSandbox().
   199  func (fc *firecracker) createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig, stateful bool) error {
   200  	fc.ctx = ctx
   201  
   202  	span, _ := fc.trace("createSandbox")
   203  	defer span.Finish()
   204  
   205  	//TODO: check validity of the hypervisor config provided
   206  	//https://github.com/kata-containers/runtime/issues/1065
   207  	fc.id = fc.truncateID(id)
   208  	fc.state.set(notReady)
   209  	fc.config = *hypervisorConfig
   210  	fc.stateful = stateful
   211  
   212  	// When running with jailer all resources need to be under
   213  	// a specific location and that location needs to have
   214  	// exec permission (i.e. should not be mounted noexec, e.g. /run, /var/run)
   215  	// Also unix domain socket names have a hard limit
   216  	// #define UNIX_PATH_MAX   108
   217  	// Keep it short and live within the jailer expected paths
   218  	// <chroot_base>/<exec_file_name>/<id>/
   219  	// Also jailer based on the id implicitly sets up cgroups under
   220  	// <cgroups_base>/<exec_file_name>/<id>/
   221  	hypervisorName := filepath.Base(hypervisorConfig.HypervisorPath)
   222  	//fs.RunStoragePath cannot be used as we need exec perms
   223  	fc.chrootBaseDir = filepath.Join("/run", storagePathSuffix)
   224  
   225  	fc.vmPath = filepath.Join(fc.chrootBaseDir, hypervisorName, fc.id)
   226  	fc.jailerRoot = filepath.Join(fc.vmPath, "root") // auto created by jailer
   227  
   228  	// Firecracker and jailer automatically creates default API socket under /run
   229  	// with the name of "firecracker.socket"
   230  	fc.socketPath = filepath.Join(fc.jailerRoot, "run", fcSocket)
   231  
   232  	// So we need to repopulate this at startSandbox where it is valid
   233  	fc.netNSPath = networkNS.NetNsPath
   234  
   235  	// Till we create lower privileged kata user run as root
   236  	// https://github.com/kata-containers/runtime/issues/1869
   237  	fc.uid = "0"
   238  	fc.gid = "0"
   239  
   240  	fc.fcConfig = &types.FcConfig{}
   241  	fc.fcConfigPath = filepath.Join(fc.vmPath, defaultFcConfig)
   242  	return nil
   243  }
   244  
   245  func (fc *firecracker) newFireClient() *client.Firecracker {
   246  	span, _ := fc.trace("newFireClient")
   247  	defer span.Finish()
   248  	httpClient := client.NewHTTPClient(strfmt.NewFormats())
   249  
   250  	socketTransport := &http.Transport{
   251  		DialContext: func(ctx context.Context, network, path string) (net.Conn, error) {
   252  			addr, err := net.ResolveUnixAddr("unix", fc.socketPath)
   253  			if err != nil {
   254  				return nil, err
   255  			}
   256  
   257  			return net.DialUnix("unix", nil, addr)
   258  		},
   259  	}
   260  
   261  	transport := httptransport.New(client.DefaultHost, client.DefaultBasePath, client.DefaultSchemes)
   262  	transport.SetLogger(fc.Logger())
   263  	transport.SetDebug(fc.Logger().Logger.Level == logrus.DebugLevel)
   264  	transport.Transport = socketTransport
   265  	httpClient.SetTransport(transport)
   266  
   267  	return httpClient
   268  }
   269  
   270  func (fc *firecracker) vmRunning() bool {
   271  	resp, err := fc.client().Operations.DescribeInstance(nil)
   272  	if err != nil {
   273  		fc.Logger().WithError(err).Error("getting vm status failed")
   274  		return false
   275  	}
   276  
   277  	// Be explicit
   278  	switch *resp.Payload.State {
   279  	case models.InstanceInfoStateStarting:
   280  		// Unsure what we should do here
   281  		fc.Logger().WithField("unexpected-state", models.InstanceInfoStateStarting).Debug("vmRunning")
   282  		return false
   283  	case models.InstanceInfoStateRunning:
   284  		return true
   285  	case models.InstanceInfoStateUninitialized:
   286  		return false
   287  	default:
   288  		return false
   289  	}
   290  }
   291  
   292  func (fc *firecracker) getVersionNumber() (string, error) {
   293  	args := []string{"--version"}
   294  	checkCMD := exec.Command(fc.config.HypervisorPath, args...)
   295  
   296  	data, err := checkCMD.Output()
   297  	if err != nil {
   298  		return "", fmt.Errorf("Running checking FC version command failed: %v", err)
   299  	}
   300  
   301  	var version string
   302  	fields := strings.Split(string(data), " ")
   303  	if len(fields) > 1 {
   304  		// The output format of `Firecracker --verion` is as follows
   305  		// Firecracker v0.21.1
   306  		version = strings.TrimPrefix(strings.TrimSpace(fields[1]), "v")
   307  		return version, nil
   308  	}
   309  
   310  	return "", errors.New("getting FC version failed, the output is malformed")
   311  }
   312  
   313  func (fc *firecracker) checkVersion(version string) error {
   314  	v, err := semver.Make(version)
   315  	if err != nil {
   316  		return fmt.Errorf("Malformed firecracker version: %v", err)
   317  	}
   318  
   319  	if v.LT(fcMinSupportedVersion) {
   320  		return fmt.Errorf("version %v is not supported. Minimum supported version of firecracker is %v", v.String(), fcMinSupportedVersion.String())
   321  	}
   322  
   323  	return nil
   324  }
   325  
   326  // waitVMMRunning will wait for timeout seconds for the VMM to be up and running.
   327  func (fc *firecracker) waitVMMRunning(timeout int) error {
   328  	span, _ := fc.trace("wait VMM to be running")
   329  	defer span.Finish()
   330  
   331  	if timeout < 0 {
   332  		return fmt.Errorf("Invalid timeout %ds", timeout)
   333  	}
   334  
   335  	timeStart := time.Now()
   336  	for {
   337  		if fc.vmRunning() {
   338  			return nil
   339  		}
   340  
   341  		if int(time.Since(timeStart).Seconds()) > timeout {
   342  			return fmt.Errorf("Failed to connect to firecrackerinstance (timeout %ds)", timeout)
   343  		}
   344  
   345  		time.Sleep(time.Duration(10) * time.Millisecond)
   346  	}
   347  }
   348  
   349  func (fc *firecracker) fcInit(timeout int) error {
   350  	span, _ := fc.trace("fcInit")
   351  	defer span.Finish()
   352  
   353  	var err error
   354  	//FC version set and check
   355  	if fc.info.Version, err = fc.getVersionNumber(); err != nil {
   356  		return err
   357  	}
   358  
   359  	if err := fc.checkVersion(fc.info.Version); err != nil {
   360  		return err
   361  	}
   362  
   363  	var cmd *exec.Cmd
   364  	var args []string
   365  
   366  	if fc.fcConfigPath, err = fc.fcJailResource(fc.fcConfigPath, defaultFcConfig); err != nil {
   367  		return err
   368  	}
   369  
   370  	if !fc.config.Debug && fc.stateful {
   371  		args = append(args, "--daemonize")
   372  	}
   373  
   374  	//https://github.com/firecracker-microvm/firecracker/blob/master/docs/jailer.md#jailer-usage
   375  	//--seccomp-level specifies whether seccomp filters should be installed and how restrictive they should be. Possible values are:
   376  	//0 : disabled.
   377  	//1 : basic filtering. This prohibits syscalls not whitelisted by Firecracker.
   378  	//2 (default): advanced filtering. This adds further checks on some of the parameters of the allowed syscalls.
   379  	if fc.jailed {
   380  		jailedArgs := []string{
   381  			"--id", fc.id,
   382  			"--node", "0", //FIXME: Comprehend NUMA topology or explicit ignore
   383  			"--exec-file", fc.config.HypervisorPath,
   384  			"--uid", "0", //https://github.com/kata-containers/runtime/issues/1869
   385  			"--gid", "0",
   386  			"--chroot-base-dir", fc.chrootBaseDir,
   387  		}
   388  		args = append(args, jailedArgs...)
   389  		if fc.netNSPath != "" {
   390  			args = append(args, "--netns", fc.netNSPath)
   391  		}
   392  		args = append(args, "--", "--config-file", fc.fcConfigPath)
   393  
   394  		cmd = exec.Command(fc.config.JailerPath, args...)
   395  	} else {
   396  		args = append(args,
   397  			"--api-sock", fc.socketPath,
   398  			"--config-file", fc.fcConfigPath)
   399  		cmd = exec.Command(fc.config.HypervisorPath, args...)
   400  	}
   401  
   402  	if fc.config.Debug && fc.stateful {
   403  		stdin, err := fc.watchConsole()
   404  		if err != nil {
   405  			return err
   406  		}
   407  
   408  		cmd.Stderr = stdin
   409  		cmd.Stdout = stdin
   410  	}
   411  
   412  	fc.Logger().WithField("hypervisor args", args).Debug()
   413  	fc.Logger().WithField("hypervisor cmd", cmd).Debug()
   414  
   415  	fc.Logger().Info("Starting VM")
   416  	if err := cmd.Start(); err != nil {
   417  		fc.Logger().WithField("Error starting firecracker", err).Debug()
   418  		return err
   419  	}
   420  
   421  	fc.info.PID = cmd.Process.Pid
   422  	fc.firecrackerd = cmd
   423  	fc.connection = fc.newFireClient()
   424  
   425  	if err := fc.waitVMMRunning(timeout); err != nil {
   426  		fc.Logger().WithField("fcInit failed:", err).Debug()
   427  		return err
   428  	}
   429  	return nil
   430  }
   431  
   432  func (fc *firecracker) fcEnd() (err error) {
   433  	span, _ := fc.trace("fcEnd")
   434  	defer span.Finish()
   435  
   436  	fc.Logger().Info("Stopping firecracker VM")
   437  
   438  	defer func() {
   439  		if err != nil {
   440  			fc.Logger().Info("fcEnd failed")
   441  		} else {
   442  			fc.Logger().Info("Firecracker VM stopped")
   443  		}
   444  	}()
   445  
   446  	pid := fc.info.PID
   447  
   448  	// Send a SIGTERM to the VM process to try to stop it properly
   449  	if err = syscall.Kill(pid, syscall.SIGTERM); err != nil {
   450  		if err == syscall.ESRCH {
   451  			return nil
   452  		}
   453  		return err
   454  	}
   455  
   456  	// Wait for the VM process to terminate
   457  	tInit := time.Now()
   458  	for {
   459  		if err = syscall.Kill(pid, syscall.Signal(0)); err != nil {
   460  			return nil
   461  		}
   462  
   463  		if time.Since(tInit).Seconds() >= fcStopSandboxTimeout {
   464  			fc.Logger().Warnf("VM still running after waiting %ds", fcStopSandboxTimeout)
   465  			break
   466  		}
   467  
   468  		// Let's avoid to run a too busy loop
   469  		time.Sleep(time.Duration(50) * time.Millisecond)
   470  	}
   471  
   472  	// Let's try with a hammer now, a SIGKILL should get rid of the
   473  	// VM process.
   474  	return syscall.Kill(pid, syscall.SIGKILL)
   475  }
   476  
   477  func (fc *firecracker) client() *client.Firecracker {
   478  	span, _ := fc.trace("client")
   479  	defer span.Finish()
   480  
   481  	if fc.connection == nil {
   482  		fc.connection = fc.newFireClient()
   483  	}
   484  
   485  	return fc.connection
   486  }
   487  
   488  func (fc *firecracker) createJailedDrive(name string) (string, error) {
   489  	// Don't bind mount the resource, just create a raw file
   490  	// that can be bind-mounted later
   491  	r := filepath.Join(fc.jailerRoot, name)
   492  	f, err := os.Create(r)
   493  	if err != nil {
   494  		return "", err
   495  	}
   496  	f.Close()
   497  
   498  	if fc.jailed {
   499  		// use path relative to the jail
   500  		r = filepath.Join("/", name)
   501  	}
   502  
   503  	return r, nil
   504  }
   505  
   506  // when running with jailer, firecracker binary will firstly be copied into fc.jailerRoot,
   507  // and then being executed there. Therefore we need to ensure fc.JailerRoot has exec permissions.
   508  func (fc *firecracker) fcRemountJailerRootWithExec() error {
   509  	if err := bindMount(context.Background(), fc.jailerRoot, fc.jailerRoot, false, "shared"); err != nil {
   510  		fc.Logger().WithField("JailerRoot", fc.jailerRoot).Errorf("bindMount failed: %v", err)
   511  		return err
   512  	}
   513  
   514  	// /run is normally mounted with rw, nosuid(MS_NOSUID), relatime(MS_RELATIME), noexec(MS_NOEXEC).
   515  	// we re-mount jailerRoot to deliberately leave out MS_NOEXEC.
   516  	if err := remount(context.Background(), syscall.MS_NOSUID|syscall.MS_RELATIME, fc.jailerRoot); err != nil {
   517  		fc.Logger().WithField("JailerRoot", fc.jailerRoot).Errorf("Re-mount failed: %v", err)
   518  		return err
   519  	}
   520  
   521  	return nil
   522  }
   523  
   524  func (fc *firecracker) fcJailResource(src, dst string) (string, error) {
   525  	if src == "" || dst == "" {
   526  		return "", fmt.Errorf("fcJailResource: invalid jail locations: src:%v, dst:%v",
   527  			src, dst)
   528  	}
   529  	jailedLocation := filepath.Join(fc.jailerRoot, dst)
   530  	if err := bindMount(context.Background(), src, jailedLocation, false, "slave"); err != nil {
   531  		fc.Logger().WithField("bindMount failed", err).Error()
   532  		return "", err
   533  	}
   534  
   535  	if !fc.jailed {
   536  		return jailedLocation, nil
   537  	}
   538  
   539  	// This is the path within the jailed root
   540  	absPath := filepath.Join("/", dst)
   541  	return absPath, nil
   542  }
   543  
   544  func (fc *firecracker) fcSetBootSource(path, params string) error {
   545  	span, _ := fc.trace("fcSetBootSource")
   546  	defer span.Finish()
   547  	fc.Logger().WithFields(logrus.Fields{"kernel-path": path,
   548  		"kernel-params": params}).Debug("fcSetBootSource")
   549  
   550  	kernelPath, err := fc.fcJailResource(path, fcKernel)
   551  	if err != nil {
   552  		return err
   553  	}
   554  
   555  	src := &models.BootSource{
   556  		KernelImagePath: &kernelPath,
   557  		BootArgs:        params,
   558  	}
   559  
   560  	fc.fcConfig.BootSource = src
   561  
   562  	return nil
   563  }
   564  
   565  func (fc *firecracker) fcSetVMRootfs(path string) error {
   566  	span, _ := fc.trace("fcSetVMRootfs")
   567  	defer span.Finish()
   568  
   569  	jailedRootfs, err := fc.fcJailResource(path, fcRootfs)
   570  	if err != nil {
   571  		return err
   572  	}
   573  
   574  	driveID := "rootfs"
   575  	isReadOnly := true
   576  	//Add it as a regular block device
   577  	//This allows us to use a partitoned root block device
   578  	isRootDevice := false
   579  	// This is the path within the jailed root
   580  	drive := &models.Drive{
   581  		DriveID:      &driveID,
   582  		IsReadOnly:   &isReadOnly,
   583  		IsRootDevice: &isRootDevice,
   584  		PathOnHost:   &jailedRootfs,
   585  	}
   586  
   587  	fc.fcConfig.Drives = append(fc.fcConfig.Drives, drive)
   588  
   589  	return nil
   590  }
   591  
   592  func (fc *firecracker) fcSetVMBaseConfig(mem int64, vcpus int64, htEnabled bool) {
   593  	span, _ := fc.trace("fcSetVMBaseConfig")
   594  	defer span.Finish()
   595  	fc.Logger().WithFields(logrus.Fields{"mem": mem,
   596  		"vcpus":     vcpus,
   597  		"htEnabled": htEnabled}).Debug("fcSetVMBaseConfig")
   598  
   599  	cfg := &models.MachineConfiguration{
   600  		HtEnabled:  &htEnabled,
   601  		MemSizeMib: &mem,
   602  		VcpuCount:  &vcpus,
   603  	}
   604  
   605  	fc.fcConfig.MachineConfig = cfg
   606  }
   607  
   608  func (fc *firecracker) fcSetLogger() error {
   609  	span, _ := fc.trace("fcSetLogger")
   610  	defer span.Finish()
   611  
   612  	fcLogLevel := "Error"
   613  	if fc.config.Debug {
   614  		fcLogLevel = "Debug"
   615  	}
   616  
   617  	// listen to log fifo file and transfer error info
   618  	jailedLogFifo, err := fc.fcListenToFifo(fcLogFifo)
   619  	if err != nil {
   620  		return fmt.Errorf("Failed setting log: %s", err)
   621  	}
   622  
   623  	// listen to metrics file and transfer error info
   624  	jailedMetricsFifo, err := fc.fcListenToFifo(fcMetricsFifo)
   625  	if err != nil {
   626  		return fmt.Errorf("Failed setting log: %s", err)
   627  	}
   628  
   629  	fc.fcConfig.Logger = &models.Logger{
   630  		Level:       &fcLogLevel,
   631  		LogFifo:     &jailedLogFifo,
   632  		MetricsFifo: &jailedMetricsFifo,
   633  	}
   634  
   635  	return err
   636  }
   637  
   638  func (fc *firecracker) fcListenToFifo(fifoName string) (string, error) {
   639  	fcFifoPath := filepath.Join(fc.vmPath, fifoName)
   640  	fcFifo, err := fifo.OpenFifo(context.Background(), fcFifoPath, syscall.O_CREAT|syscall.O_RDONLY|syscall.O_NONBLOCK, 0)
   641  	if err != nil {
   642  		return "", fmt.Errorf("Failed to open/create fifo file %s", err)
   643  	}
   644  
   645  	jailedFifoPath, err := fc.fcJailResource(fcFifoPath, fifoName)
   646  	if err != nil {
   647  		return "", err
   648  	}
   649  
   650  	go func() {
   651  		scanner := bufio.NewScanner(fcFifo)
   652  		for scanner.Scan() {
   653  			fc.Logger().WithFields(logrus.Fields{
   654  				"fifoName": fifoName,
   655  				"contents": scanner.Text()}).Error("firecracker failed")
   656  		}
   657  
   658  		if err := scanner.Err(); err != nil {
   659  			fc.Logger().WithError(err).Errorf("Failed reading firecracker fifo file")
   660  		}
   661  
   662  		if err := fcFifo.Close(); err != nil {
   663  			fc.Logger().WithError(err).Errorf("Failed closing firecracker fifo file")
   664  		}
   665  	}()
   666  
   667  	return jailedFifoPath, nil
   668  }
   669  
   670  func (fc *firecracker) fcInitConfiguration() error {
   671  	// Firecracker API socket(firecracker.socket) is automatically created
   672  	// under /run dir.
   673  	err := os.MkdirAll(filepath.Join(fc.jailerRoot, "run"), DirMode)
   674  	if err != nil {
   675  		return err
   676  	}
   677  	defer func() {
   678  		if err != nil {
   679  			if err := os.RemoveAll(fc.vmPath); err != nil {
   680  				fc.Logger().WithError(err).Error("Fail to clean up vm directory")
   681  			}
   682  		}
   683  	}()
   684  
   685  	if fc.config.JailerPath != "" {
   686  		fc.jailed = true
   687  		if err := fc.fcRemountJailerRootWithExec(); err != nil {
   688  			return err
   689  		}
   690  	}
   691  
   692  	fc.fcSetVMBaseConfig(int64(fc.config.MemorySize),
   693  		int64(fc.config.NumVCPUs), false)
   694  
   695  	kernelPath, err := fc.config.KernelAssetPath()
   696  	if err != nil {
   697  		return err
   698  	}
   699  
   700  	if fc.config.Debug && fc.stateful {
   701  		fcKernelParams = append(fcKernelParams, Param{"console", "ttyS0"})
   702  	} else {
   703  		fcKernelParams = append(fcKernelParams, []Param{
   704  			{"8250.nr_uarts", "0"},
   705  			// Tell agent where to send the logs
   706  			{"agent.log_vport", fmt.Sprintf("%d", vSockLogsPort)},
   707  		}...)
   708  	}
   709  
   710  	kernelParams := append(fc.config.KernelParams, fcKernelParams...)
   711  	strParams := SerializeParams(kernelParams, "=")
   712  	formattedParams := strings.Join(strParams, " ")
   713  	if err := fc.fcSetBootSource(kernelPath, formattedParams); err != nil {
   714  		return err
   715  	}
   716  
   717  	image, err := fc.config.InitrdAssetPath()
   718  	if err != nil {
   719  		return err
   720  	}
   721  
   722  	if image == "" {
   723  		image, err = fc.config.ImageAssetPath()
   724  		if err != nil {
   725  			return err
   726  		}
   727  	}
   728  
   729  	if err := fc.fcSetVMRootfs(image); err != nil {
   730  		return err
   731  	}
   732  
   733  	if err := fc.createDiskPool(); err != nil {
   734  		return err
   735  	}
   736  
   737  	if err := fc.fcSetLogger(); err != nil {
   738  		return err
   739  	}
   740  
   741  	fc.state.set(cfReady)
   742  	for _, d := range fc.pendingDevices {
   743  		if err := fc.addDevice(d.dev, d.devType); err != nil {
   744  			return err
   745  		}
   746  	}
   747  
   748  	return nil
   749  }
   750  
   751  // startSandbox will start the hypervisor for the given sandbox.
   752  // In the context of firecracker, this will start the hypervisor,
   753  // for configuration, but not yet start the actual virtual machine
   754  func (fc *firecracker) startSandbox(timeout int) error {
   755  	span, _ := fc.trace("startSandbox")
   756  	defer span.Finish()
   757  
   758  	if err := fc.fcInitConfiguration(); err != nil {
   759  		return err
   760  	}
   761  
   762  	data, errJSON := json.MarshalIndent(fc.fcConfig, "", "\t")
   763  	if errJSON != nil {
   764  		return errJSON
   765  	}
   766  
   767  	if err := ioutil.WriteFile(fc.fcConfigPath, data, 0640); err != nil {
   768  		return err
   769  	}
   770  
   771  	var err error
   772  	defer func() {
   773  		if err != nil {
   774  			fc.fcEnd()
   775  		}
   776  	}()
   777  
   778  	// This needs to be done as late as possible, since all processes that
   779  	// are executed by kata-runtime after this call, run with the SELinux
   780  	// label. If these processes require privileged, we do not want to run
   781  	// them under confinement.
   782  	if err := label.SetProcessLabel(fc.config.SELinuxProcessLabel); err != nil {
   783  		return err
   784  	}
   785  	defer label.SetProcessLabel("")
   786  
   787  	err = fc.fcInit(fcTimeout)
   788  	if err != nil {
   789  		return err
   790  	}
   791  
   792  	// make sure 'others' don't have access to this socket
   793  	err = os.Chmod(filepath.Join(fc.jailerRoot, defaultHybridVSocketName), 0640)
   794  	if err != nil {
   795  		return fmt.Errorf("Could not change socket permissions: %v", err)
   796  	}
   797  
   798  	fc.state.set(vmReady)
   799  	return nil
   800  }
   801  
   802  func fcDriveIndexToID(i int) string {
   803  	return "drive_" + strconv.Itoa(i)
   804  }
   805  
   806  func (fc *firecracker) createDiskPool() error {
   807  	span, _ := fc.trace("createDiskPool")
   808  	defer span.Finish()
   809  
   810  	for i := 0; i < fcDiskPoolSize; i++ {
   811  		driveID := fcDriveIndexToID(i)
   812  		isReadOnly := false
   813  		isRootDevice := false
   814  
   815  		// Create a temporary file as a placeholder backend for the drive
   816  		jailedDrive, err := fc.createJailedDrive(driveID)
   817  		if err != nil {
   818  			return err
   819  		}
   820  
   821  		drive := &models.Drive{
   822  			DriveID:      &driveID,
   823  			IsReadOnly:   &isReadOnly,
   824  			IsRootDevice: &isRootDevice,
   825  			PathOnHost:   &jailedDrive,
   826  		}
   827  
   828  		fc.fcConfig.Drives = append(fc.fcConfig.Drives, drive)
   829  	}
   830  
   831  	return nil
   832  }
   833  
   834  func (fc *firecracker) umountResource(jailedPath string) {
   835  	hostPath := filepath.Join(fc.jailerRoot, jailedPath)
   836  	fc.Logger().WithField("resource", hostPath).Debug("Unmounting resource")
   837  	err := syscall.Unmount(hostPath, syscall.MNT_DETACH)
   838  	if err != nil {
   839  		fc.Logger().WithError(err).Error("Failed to umount resource")
   840  	}
   841  }
   842  
   843  // cleanup all jail artifacts
   844  func (fc *firecracker) cleanupJail() {
   845  	span, _ := fc.trace("cleanupJail")
   846  	defer span.Finish()
   847  
   848  	fc.umountResource(fcKernel)
   849  	fc.umountResource(fcRootfs)
   850  	fc.umountResource(fcLogFifo)
   851  	fc.umountResource(fcMetricsFifo)
   852  	fc.umountResource(defaultFcConfig)
   853  	// if running with jailer, we also need to umount fc.jailerRoot
   854  	if fc.config.JailerPath != "" {
   855  		if err := syscall.Unmount(fc.jailerRoot, syscall.MNT_DETACH); err != nil {
   856  			fc.Logger().WithField("JailerRoot", fc.jailerRoot).WithError(err).Error("Failed to umount")
   857  		}
   858  	}
   859  
   860  	fc.Logger().WithField("cleaningJail", fc.vmPath).Info()
   861  	if err := os.RemoveAll(fc.vmPath); err != nil {
   862  		fc.Logger().WithField("cleanupJail failed", err).Error()
   863  	}
   864  }
   865  
   866  // stopSandbox will stop the Sandbox's VM.
   867  func (fc *firecracker) stopSandbox() (err error) {
   868  	span, _ := fc.trace("stopSandbox")
   869  	defer span.Finish()
   870  
   871  	return fc.fcEnd()
   872  }
   873  
   874  func (fc *firecracker) pauseSandbox() error {
   875  	return nil
   876  }
   877  
   878  func (fc *firecracker) saveSandbox() error {
   879  	return nil
   880  }
   881  
   882  func (fc *firecracker) resumeSandbox() error {
   883  	return nil
   884  }
   885  
   886  func (fc *firecracker) fcAddVsock(hvs types.HybridVSock) {
   887  	span, _ := fc.trace("fcAddVsock")
   888  	defer span.Finish()
   889  
   890  	udsPath := hvs.UdsPath
   891  	if fc.jailed {
   892  		udsPath = filepath.Join("/", defaultHybridVSocketName)
   893  	}
   894  
   895  	vsockID := "root"
   896  	ctxID := defaultGuestVSockCID
   897  	vsock := &models.Vsock{
   898  		GuestCid: &ctxID,
   899  		UdsPath:  &udsPath,
   900  		VsockID:  &vsockID,
   901  	}
   902  
   903  	fc.fcConfig.Vsock = vsock
   904  }
   905  
   906  func (fc *firecracker) fcAddNetDevice(endpoint Endpoint) {
   907  	span, _ := fc.trace("fcAddNetDevice")
   908  	defer span.Finish()
   909  
   910  	ifaceID := endpoint.Name()
   911  	ifaceCfg := &models.NetworkInterface{
   912  		AllowMmdsRequests: false,
   913  		GuestMac:          endpoint.HardwareAddr(),
   914  		IfaceID:           &ifaceID,
   915  		HostDevName:       &endpoint.NetworkPair().TapInterface.TAPIface.Name,
   916  	}
   917  
   918  	fc.fcConfig.NetworkInterfaces = append(fc.fcConfig.NetworkInterfaces, ifaceCfg)
   919  }
   920  
   921  func (fc *firecracker) fcAddBlockDrive(drive config.BlockDrive) error {
   922  	span, _ := fc.trace("fcAddBlockDrive")
   923  	defer span.Finish()
   924  
   925  	driveID := drive.ID
   926  	isReadOnly := false
   927  	isRootDevice := false
   928  
   929  	jailedDrive, err := fc.fcJailResource(drive.File, driveID)
   930  	if err != nil {
   931  		fc.Logger().WithField("fcAddBlockDrive failed", err).Error()
   932  		return err
   933  	}
   934  	driveFc := &models.Drive{
   935  		DriveID:      &driveID,
   936  		IsReadOnly:   &isReadOnly,
   937  		IsRootDevice: &isRootDevice,
   938  		PathOnHost:   &jailedDrive,
   939  	}
   940  
   941  	fc.fcConfig.Drives = append(fc.fcConfig.Drives, driveFc)
   942  
   943  	return nil
   944  }
   945  
   946  // Firecracker supports replacing the host drive used once the VM has booted up
   947  func (fc *firecracker) fcUpdateBlockDrive(path, id string) error {
   948  	span, _ := fc.trace("fcUpdateBlockDrive")
   949  	defer span.Finish()
   950  
   951  	// Use the global block index as an index into the pool of the devices
   952  	// created for firecracker.
   953  	driveParams := ops.NewPatchGuestDriveByIDParams()
   954  	driveParams.SetDriveID(id)
   955  
   956  	driveFc := &models.PartialDrive{
   957  		DriveID:    &id,
   958  		PathOnHost: &path, //This is the only property that can be modified
   959  	}
   960  
   961  	driveParams.SetBody(driveFc)
   962  	if _, err := fc.client().Operations.PatchGuestDriveByID(driveParams); err != nil {
   963  		return err
   964  	}
   965  
   966  	return nil
   967  }
   968  
   969  // addDevice will add extra devices to firecracker.  Limited to configure before the
   970  // virtual machine starts.  Devices include drivers and network interfaces only.
   971  func (fc *firecracker) addDevice(devInfo interface{}, devType deviceType) error {
   972  	span, _ := fc.trace("addDevice")
   973  	defer span.Finish()
   974  
   975  	fc.state.RLock()
   976  	defer fc.state.RUnlock()
   977  
   978  	if fc.state.state == notReady {
   979  		dev := firecrackerDevice{
   980  			dev:     devInfo,
   981  			devType: devType,
   982  		}
   983  		fc.Logger().Info("FC not ready, queueing device")
   984  		fc.pendingDevices = append(fc.pendingDevices, dev)
   985  		return nil
   986  	}
   987  
   988  	var err error
   989  	switch v := devInfo.(type) {
   990  	case Endpoint:
   991  		fc.Logger().WithField("device-type-endpoint", devInfo).Info("Adding device")
   992  		fc.fcAddNetDevice(v)
   993  	case config.BlockDrive:
   994  		fc.Logger().WithField("device-type-blockdrive", devInfo).Info("Adding device")
   995  		err = fc.fcAddBlockDrive(v)
   996  	case types.HybridVSock:
   997  		fc.Logger().WithField("device-type-hybrid-vsock", devInfo).Info("Adding device")
   998  		fc.fcAddVsock(v)
   999  	default:
  1000  		fc.Logger().WithField("unknown-device-type", devInfo).Error("Adding device")
  1001  	}
  1002  
  1003  	return err
  1004  }
  1005  
  1006  // hotplugBlockDevice supported in Firecracker VMM
  1007  // hot add or remove a block device.
  1008  func (fc *firecracker) hotplugBlockDevice(drive config.BlockDrive, op operation) (interface{}, error) {
  1009  	var path string
  1010  	var err error
  1011  	driveID := fcDriveIndexToID(drive.Index)
  1012  
  1013  	if op == addDevice {
  1014  		//The drive placeholder has to exist prior to Update
  1015  		path, err = fc.fcJailResource(drive.File, driveID)
  1016  		if err != nil {
  1017  			fc.Logger().WithError(err).WithField("resource", drive.File).Error("Could not jail resource")
  1018  			return nil, err
  1019  		}
  1020  	} else {
  1021  		// umount the disk, it's no longer needed.
  1022  		fc.umountResource(driveID)
  1023  		// use previous raw file created at createDiskPool, that way
  1024  		// the resource is released by firecracker and it can be destroyed in the host
  1025  		path = filepath.Join(fc.jailerRoot, driveID)
  1026  	}
  1027  
  1028  	return nil, fc.fcUpdateBlockDrive(path, driveID)
  1029  }
  1030  
  1031  // hotplugAddDevice supported in Firecracker VMM
  1032  func (fc *firecracker) hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
  1033  	span, _ := fc.trace("hotplugAddDevice")
  1034  	defer span.Finish()
  1035  
  1036  	switch devType {
  1037  	case blockDev:
  1038  		return fc.hotplugBlockDevice(*devInfo.(*config.BlockDrive), addDevice)
  1039  	default:
  1040  		fc.Logger().WithFields(logrus.Fields{"devInfo": devInfo,
  1041  			"deviceType": devType}).Warn("hotplugAddDevice: unsupported device")
  1042  		return nil, fmt.Errorf("Could not hot add device: unsupported device: %v, type: %v",
  1043  			devInfo, devType)
  1044  	}
  1045  }
  1046  
  1047  // hotplugRemoveDevice supported in Firecracker VMM
  1048  func (fc *firecracker) hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error) {
  1049  	span, _ := fc.trace("hotplugRemoveDevice")
  1050  	defer span.Finish()
  1051  
  1052  	switch devType {
  1053  	case blockDev:
  1054  		return fc.hotplugBlockDevice(*devInfo.(*config.BlockDrive), removeDevice)
  1055  	default:
  1056  		fc.Logger().WithFields(logrus.Fields{"devInfo": devInfo,
  1057  			"deviceType": devType}).Error("hotplugRemoveDevice: unsupported device")
  1058  		return nil, fmt.Errorf("Could not hot remove device: unsupported device: %v, type: %v",
  1059  			devInfo, devType)
  1060  	}
  1061  }
  1062  
  1063  // getSandboxConsole builds the path of the console where we can read
  1064  // logs coming from the sandbox.
  1065  func (fc *firecracker) getSandboxConsole(id string) (string, error) {
  1066  	return fmt.Sprintf("%s://%s:%d", kataclient.HybridVSockScheme, filepath.Join(fc.jailerRoot, defaultHybridVSocketName), vSockLogsPort), nil
  1067  }
  1068  
  1069  func (fc *firecracker) disconnect() {
  1070  	fc.state.set(notReady)
  1071  }
  1072  
  1073  // Adds all capabilities supported by firecracker implementation of hypervisor interface
  1074  func (fc *firecracker) capabilities() types.Capabilities {
  1075  	span, _ := fc.trace("capabilities")
  1076  	defer span.Finish()
  1077  	var caps types.Capabilities
  1078  	caps.SetBlockDeviceHotplugSupport()
  1079  
  1080  	return caps
  1081  }
  1082  
  1083  func (fc *firecracker) hypervisorConfig() HypervisorConfig {
  1084  	return fc.config
  1085  }
  1086  
  1087  func (fc *firecracker) resizeMemory(reqMemMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, memoryDevice, error) {
  1088  	return 0, memoryDevice{}, nil
  1089  }
  1090  
  1091  func (fc *firecracker) resizeVCPUs(reqVCPUs uint32) (currentVCPUs uint32, newVCPUs uint32, err error) {
  1092  	return 0, 0, nil
  1093  }
  1094  
  1095  // This is used to apply cgroup information on the host.
  1096  //
  1097  // As suggested by https://github.com/firecracker-microvm/firecracker/issues/718,
  1098  // let's use `ps -T -p <pid>` to get fc vcpu info.
  1099  func (fc *firecracker) getThreadIDs() (vcpuThreadIDs, error) {
  1100  	var vcpuInfo vcpuThreadIDs
  1101  
  1102  	vcpuInfo.vcpus = make(map[int]int)
  1103  	parent, err := utils.NewProc(fc.info.PID)
  1104  	if err != nil {
  1105  		return vcpuInfo, err
  1106  	}
  1107  	children, err := parent.Children()
  1108  	if err != nil {
  1109  		return vcpuInfo, err
  1110  	}
  1111  	for _, child := range children {
  1112  		comm, err := child.Comm()
  1113  		if err != nil {
  1114  			return vcpuInfo, errors.New("Invalid fc thread info")
  1115  		}
  1116  		if !strings.HasPrefix(comm, "fc_vcpu") {
  1117  			continue
  1118  		}
  1119  		cpus := strings.SplitAfter(comm, "fc_vcpu")
  1120  		if len(cpus) != 2 {
  1121  			return vcpuInfo, errors.Errorf("Invalid fc thread info: %v", comm)
  1122  		}
  1123  		cpuID, err := strconv.ParseInt(cpus[1], 10, 32)
  1124  		if err != nil {
  1125  			return vcpuInfo, errors.Wrapf(err, "Invalid fc thread info: %v", comm)
  1126  		}
  1127  		vcpuInfo.vcpus[int(cpuID)] = child.PID
  1128  	}
  1129  
  1130  	return vcpuInfo, nil
  1131  }
  1132  
  1133  func (fc *firecracker) cleanup() error {
  1134  	fc.cleanupJail()
  1135  	return nil
  1136  }
  1137  
  1138  func (fc *firecracker) getPids() []int {
  1139  	return []int{fc.info.PID}
  1140  }
  1141  
  1142  func (fc *firecracker) fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error {
  1143  	return errors.New("firecracker is not supported by VM cache")
  1144  }
  1145  
  1146  func (fc *firecracker) toGrpc() ([]byte, error) {
  1147  	return nil, errors.New("firecracker is not supported by VM cache")
  1148  }
  1149  
  1150  func (fc *firecracker) save() (s persistapi.HypervisorState) {
  1151  	s.Pid = fc.info.PID
  1152  	s.Type = string(FirecrackerHypervisor)
  1153  	return
  1154  }
  1155  
  1156  func (fc *firecracker) load(s persistapi.HypervisorState) {
  1157  	fc.info.PID = s.Pid
  1158  }
  1159  
  1160  func (fc *firecracker) check() error {
  1161  	if err := syscall.Kill(fc.info.PID, syscall.Signal(0)); err != nil {
  1162  		return errors.Wrapf(err, "failed to ping fc process")
  1163  	}
  1164  
  1165  	return nil
  1166  }
  1167  
  1168  func (fc *firecracker) generateSocket(id string, useVsock bool) (interface{}, error) {
  1169  	if !useVsock {
  1170  		return nil, fmt.Errorf("Can't start firecracker: vsocks is disabled")
  1171  	}
  1172  
  1173  	fc.Logger().Debug("Using hybrid-vsock endpoint")
  1174  	udsPath := filepath.Join(fc.jailerRoot, defaultHybridVSocketName)
  1175  
  1176  	return types.HybridVSock{
  1177  		UdsPath: udsPath,
  1178  		Port:    uint32(vSockPort),
  1179  	}, nil
  1180  }
  1181  
  1182  func (fc *firecracker) watchConsole() (*os.File, error) {
  1183  	master, slave, err := console.NewPty()
  1184  	if err != nil {
  1185  		fc.Logger().WithField("Error create pseudo tty", err).Debug()
  1186  		return nil, err
  1187  	}
  1188  
  1189  	stdio, err := os.OpenFile(slave, syscall.O_RDWR, 0700)
  1190  	if err != nil {
  1191  		fc.Logger().WithError(err).Debugf("open pseudo tty %s", slave)
  1192  		return nil, err
  1193  	}
  1194  
  1195  	go func() {
  1196  		scanner := bufio.NewScanner(master)
  1197  		for scanner.Scan() {
  1198  			fc.Logger().WithFields(logrus.Fields{
  1199  				"sandbox":   fc.id,
  1200  				"vmconsole": scanner.Text(),
  1201  			}).Infof("reading guest console")
  1202  		}
  1203  
  1204  		if err := scanner.Err(); err != nil {
  1205  			if err == io.EOF {
  1206  				fc.Logger().Info("console watcher quits")
  1207  			} else {
  1208  				fc.Logger().WithError(err).Error("Failed to read guest console")
  1209  			}
  1210  		}
  1211  	}()
  1212  
  1213  	return stdio, nil
  1214  }