github.com/kata-containers/runtime@v0.0.0-20210505125100-04f29832a923/virtcontainers/hypervisor.go (about)

     1  // Copyright (c) 2016 Intel Corporation
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  //
     5  
     6  package virtcontainers
     7  
     8  import (
     9  	"bufio"
    10  	"context"
    11  	"fmt"
    12  	"os"
    13  	"path/filepath"
    14  	"runtime"
    15  	"strconv"
    16  	"strings"
    17  
    18  	"github.com/kata-containers/runtime/virtcontainers/device/config"
    19  	"github.com/kata-containers/runtime/virtcontainers/persist"
    20  	persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api"
    21  	"github.com/kata-containers/runtime/virtcontainers/types"
    22  	"github.com/kata-containers/runtime/virtcontainers/utils"
    23  )
    24  
    25  // HypervisorType describes an hypervisor type.
    26  type HypervisorType string
    27  
    28  type operation int
    29  
    30  const (
    31  	addDevice operation = iota
    32  	removeDevice
    33  )
    34  
    35  const (
    36  	// FirecrackerHypervisor is the FC hypervisor.
    37  	FirecrackerHypervisor HypervisorType = "firecracker"
    38  
    39  	// QemuHypervisor is the QEMU hypervisor.
    40  	QemuHypervisor HypervisorType = "qemu"
    41  
    42  	// AcrnHypervisor is the ACRN hypervisor.
    43  	AcrnHypervisor HypervisorType = "acrn"
    44  
    45  	// ClhHypervisor is the ICH hypervisor.
    46  	ClhHypervisor HypervisorType = "clh"
    47  
    48  	// MockHypervisor is a mock hypervisor for testing purposes
    49  	MockHypervisor HypervisorType = "mock"
    50  )
    51  
    52  const (
    53  	procMemInfo = "/proc/meminfo"
    54  	procCPUInfo = "/proc/cpuinfo"
    55  )
    56  
    57  const (
    58  	defaultVCPUs = 1
    59  	// 2 GiB
    60  	defaultMemSzMiB = 2048
    61  
    62  	defaultBridges = 1
    63  
    64  	defaultBlockDriver = config.VirtioSCSI
    65  
    66  	defaultSocketName        = "kata.sock"
    67  	defaultSocketDeviceID    = "channel0"
    68  	defaultSocketChannelName = "agent.channel.0"
    69  	defaultSocketID          = "charch0"
    70  
    71  	// port numbers below 1024 are called privileged ports. Only a process with
    72  	// CAP_NET_BIND_SERVICE capability may bind to these port numbers.
    73  	vSockPort = 1024
    74  
    75  	// Port where the agent will send the logs. Logs are sent through the vsock in cases
    76  	// where the hypervisor has no console.sock, i.e firecracker
    77  	vSockLogsPort = 1025
    78  )
    79  
    80  // In some architectures the maximum number of vCPUs depends on the number of physical cores.
    81  var defaultMaxQemuVCPUs = MaxQemuVCPUs()
    82  
    83  // agnostic list of kernel root parameters for NVDIMM
    84  var commonNvdimmKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck
    85  	{"root", "/dev/pmem0p1"},
    86  	{"rootflags", "dax,data=ordered,errors=remount-ro ro"},
    87  	{"rootfstype", "ext4"},
    88  }
    89  
    90  // agnostic list of kernel root parameters for NVDIMM
    91  var commonNvdimmNoDAXKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck
    92  	{"root", "/dev/pmem0p1"},
    93  	{"rootflags", "data=ordered,errors=remount-ro ro"},
    94  	{"rootfstype", "ext4"},
    95  }
    96  
    97  // agnostic list of kernel root parameters for virtio-blk
    98  var commonVirtioblkKernelRootParams = []Param{ //nolint: unused, deadcode, varcheck
    99  	{"root", "/dev/vda1"},
   100  	{"rootflags", "data=ordered,errors=remount-ro ro"},
   101  	{"rootfstype", "ext4"},
   102  }
   103  
   104  // deviceType describes a virtualized device type.
   105  type deviceType int
   106  
   107  const (
   108  	// ImgDev is the image device type.
   109  	imgDev deviceType = iota
   110  
   111  	// FsDev is the filesystem device type.
   112  	fsDev
   113  
   114  	// NetDev is the network device type.
   115  	netDev
   116  
   117  	// BlockDev is the block device type.
   118  	blockDev
   119  
   120  	// SerialPortDev is the serial port device type.
   121  	serialPortDev
   122  
   123  	// vSockPCIDev is the vhost vsock PCI device type.
   124  	vSockPCIDev
   125  
   126  	// VFIODevice is VFIO device type
   127  	vfioDev
   128  
   129  	// vhostuserDev is a Vhost-user device type
   130  	vhostuserDev
   131  
   132  	// CPUDevice is CPU device type
   133  	cpuDev
   134  
   135  	// memoryDevice is memory device type
   136  	memoryDev
   137  
   138  	// hybridVirtioVsockDev is a hybrid virtio-vsock device supported
   139  	// only on certain hypervisors, like firecracker.
   140  	hybridVirtioVsockDev
   141  )
   142  
   143  type memoryDevice struct {
   144  	slot   int
   145  	sizeMB int
   146  	addr   uint64
   147  	probe  bool
   148  }
   149  
   150  // Set sets an hypervisor type based on the input string.
   151  func (hType *HypervisorType) Set(value string) error {
   152  	switch value {
   153  	case "qemu":
   154  		*hType = QemuHypervisor
   155  		return nil
   156  	case "firecracker":
   157  		*hType = FirecrackerHypervisor
   158  		return nil
   159  	case "acrn":
   160  		*hType = AcrnHypervisor
   161  		return nil
   162  	case "clh":
   163  		*hType = ClhHypervisor
   164  		return nil
   165  	case "mock":
   166  		*hType = MockHypervisor
   167  		return nil
   168  	default:
   169  		return fmt.Errorf("Unknown hypervisor type %s", value)
   170  	}
   171  }
   172  
   173  // String converts an hypervisor type to a string.
   174  func (hType *HypervisorType) String() string {
   175  	switch *hType {
   176  	case QemuHypervisor:
   177  		return string(QemuHypervisor)
   178  	case FirecrackerHypervisor:
   179  		return string(FirecrackerHypervisor)
   180  	case AcrnHypervisor:
   181  		return string(AcrnHypervisor)
   182  	case ClhHypervisor:
   183  		return string(ClhHypervisor)
   184  	case MockHypervisor:
   185  		return string(MockHypervisor)
   186  	default:
   187  		return ""
   188  	}
   189  }
   190  
   191  // newHypervisor returns an hypervisor from and hypervisor type.
   192  func newHypervisor(hType HypervisorType) (hypervisor, error) {
   193  	store, err := persist.GetDriver()
   194  	if err != nil {
   195  		return nil, err
   196  	}
   197  
   198  	switch hType {
   199  	case QemuHypervisor:
   200  		return &qemu{
   201  			store: store,
   202  		}, nil
   203  	case FirecrackerHypervisor:
   204  		return &firecracker{}, nil
   205  	case AcrnHypervisor:
   206  		return &Acrn{
   207  			store: store,
   208  		}, nil
   209  	case ClhHypervisor:
   210  		return &cloudHypervisor{
   211  			store: store,
   212  		}, nil
   213  	case MockHypervisor:
   214  		return &mockHypervisor{}, nil
   215  	default:
   216  		return nil, fmt.Errorf("Unknown hypervisor type %s", hType)
   217  	}
   218  }
   219  
   220  // Param is a key/value representation for hypervisor and kernel parameters.
   221  type Param struct {
   222  	Key   string
   223  	Value string
   224  }
   225  
   226  // HypervisorConfig is the hypervisor configuration.
   227  type HypervisorConfig struct {
   228  	// NumVCPUs specifies default number of vCPUs for the VM.
   229  	NumVCPUs uint32
   230  
   231  	//DefaultMaxVCPUs specifies the maximum number of vCPUs for the VM.
   232  	DefaultMaxVCPUs uint32
   233  
   234  	// DefaultMem specifies default memory size in MiB for the VM.
   235  	MemorySize uint32
   236  
   237  	// DefaultBridges specifies default number of bridges for the VM.
   238  	// Bridges can be used to hot plug devices
   239  	DefaultBridges uint32
   240  
   241  	// Msize9p is used as the msize for 9p shares
   242  	Msize9p uint32
   243  
   244  	// MemSlots specifies default memory slots the VM.
   245  	MemSlots uint32
   246  
   247  	// MemOffset specifies memory space for nvdimm device
   248  	MemOffset uint32
   249  
   250  	// VirtioFSCacheSize is the DAX cache size in MiB
   251  	VirtioFSCacheSize uint32
   252  
   253  	// KernelParams are additional guest kernel parameters.
   254  	KernelParams []Param
   255  
   256  	// HypervisorParams are additional hypervisor parameters.
   257  	HypervisorParams []Param
   258  
   259  	// KernelPath is the guest kernel host path.
   260  	KernelPath string
   261  
   262  	// ImagePath is the guest image host path.
   263  	ImagePath string
   264  
   265  	// InitrdPath is the guest initrd image host path.
   266  	// ImagePath and InitrdPath cannot be set at the same time.
   267  	InitrdPath string
   268  
   269  	// FirmwarePath is the bios host path
   270  	FirmwarePath string
   271  
   272  	// MachineAccelerators are machine specific accelerators
   273  	MachineAccelerators string
   274  
   275  	// CPUFeatures are cpu specific features
   276  	CPUFeatures string
   277  
   278  	// HypervisorPath is the hypervisor executable host path.
   279  	HypervisorPath string
   280  
   281  	// HypervisorPathList is the list of hypervisor paths names allowed in annotations
   282  	HypervisorPathList []string
   283  
   284  	// HypervisorCtlPathList is the list of hypervisor control paths names allowed in annotations
   285  	HypervisorCtlPathList []string
   286  
   287  	// HypervisorCtlPath is the hypervisor ctl executable host path.
   288  	HypervisorCtlPath string
   289  
   290  	// JailerPath is the jailer executable host path.
   291  	JailerPath string
   292  
   293  	// JailerPathList is the list of jailer paths names allowed in annotations
   294  	JailerPathList []string
   295  
   296  	// BlockDeviceDriver specifies the driver to be used for block device
   297  	// either VirtioSCSI or VirtioBlock with the default driver being defaultBlockDriver
   298  	BlockDeviceDriver string
   299  
   300  	// HypervisorMachineType specifies the type of machine being
   301  	// emulated.
   302  	HypervisorMachineType string
   303  
   304  	// MemoryPath is the memory file path of VM memory. Used when either BootToBeTemplate or
   305  	// BootFromTemplate is true.
   306  	MemoryPath string
   307  
   308  	// DevicesStatePath is the VM device state file path. Used when either BootToBeTemplate or
   309  	// BootFromTemplate is true.
   310  	DevicesStatePath string
   311  
   312  	// EntropySource is the path to a host source of
   313  	// entropy (/dev/random, /dev/urandom or real hardware RNG device)
   314  	EntropySource string
   315  
   316  	// Shared file system type:
   317  	//   - virtio-9p (default)
   318  	//   - virtio-fs
   319  	SharedFS string
   320  
   321  	// VirtioFSDaemon is the virtio-fs vhost-user daemon path
   322  	VirtioFSDaemon string
   323  
   324  	// VirtioFSDaemonList is the list of valid virtiofs names for annotations
   325  	VirtioFSDaemonList []string
   326  
   327  	// VirtioFSCache cache mode for fs version cache or "none"
   328  	VirtioFSCache string
   329  
   330  	// VirtioFSExtraArgs passes options to virtiofsd daemon
   331  	VirtioFSExtraArgs []string
   332  
   333  	// File based memory backend root directory
   334  	FileBackedMemRootDir string
   335  
   336  	// PFlash image paths
   337  	PFlash []string
   338  
   339  	// FileBackedMemRootList is the list of valid root directories values for annotations
   340  	FileBackedMemRootList []string
   341  
   342  	// customAssets is a map of assets.
   343  	// Each value in that map takes precedence over the configured assets.
   344  	// For example, if there is a value for the "kernel" key in this map,
   345  	// it will be used for the sandbox's kernel path instead of KernelPath.
   346  	customAssets map[types.AssetType]*types.Asset
   347  
   348  	// BlockDeviceCacheSet specifies cache-related options will be set to block devices or not.
   349  	BlockDeviceCacheSet bool
   350  
   351  	// BlockDeviceCacheDirect specifies cache-related options for block devices.
   352  	// Denotes whether use of O_DIRECT (bypass the host page cache) is enabled.
   353  	BlockDeviceCacheDirect bool
   354  
   355  	// BlockDeviceCacheNoflush specifies cache-related options for block devices.
   356  	// Denotes whether flush requests for the device are ignored.
   357  	BlockDeviceCacheNoflush bool
   358  
   359  	// DisableBlockDeviceUse disallows a block device from being used.
   360  	DisableBlockDeviceUse bool
   361  
   362  	// EnableIOThreads enables IO to be processed in a separate thread.
   363  	// Supported currently for virtio-scsi driver.
   364  	EnableIOThreads bool
   365  
   366  	// Debug changes the default hypervisor and kernel parameters to
   367  	// enable debug output where available.
   368  	Debug bool
   369  
   370  	// MemPrealloc specifies if the memory should be pre-allocated
   371  	MemPrealloc bool
   372  
   373  	// HugePages specifies if the memory should be pre-allocated from huge pages
   374  	HugePages bool
   375  
   376  	// VirtioMem is used to enable/disable virtio-mem
   377  	VirtioMem bool
   378  
   379  	// IOMMU specifies if the VM should have a vIOMMU
   380  	IOMMU bool
   381  
   382  	// IOMMUPlatform is used to indicate if IOMMU_PLATFORM is enabled for supported devices
   383  	IOMMUPlatform bool
   384  
   385  	// Realtime Used to enable/disable realtime
   386  	Realtime bool
   387  
   388  	// Mlock is used to control memory locking when Realtime is enabled
   389  	// Realtime=true and Mlock=false, allows for swapping out of VM memory
   390  	// enabling higher density
   391  	Mlock bool
   392  
   393  	// DisableNestingChecks is used to override customizations performed
   394  	// when running on top of another VMM.
   395  	DisableNestingChecks bool
   396  
   397  	// UseVSock use a vsock for agent communication
   398  	UseVSock bool
   399  
   400  	// DisableImageNvdimm is used to disable guest rootfs image nvdimm devices
   401  	DisableImageNvdimm bool
   402  
   403  	// HotplugVFIOOnRootBus is used to indicate if devices need to be hotplugged on the
   404  	// root bus instead of a bridge.
   405  	HotplugVFIOOnRootBus bool
   406  
   407  	// PCIeRootPort is used to indicate the number of PCIe Root Port devices
   408  	// The PCIe Root Port device is used to hot-plug the PCIe device
   409  	PCIeRootPort uint32
   410  
   411  	// BootToBeTemplate used to indicate if the VM is created to be a template VM
   412  	BootToBeTemplate bool
   413  
   414  	// BootFromTemplate used to indicate if the VM should be created from a template VM
   415  	BootFromTemplate bool
   416  
   417  	// DisableVhostNet is used to indicate if host supports vhost_net
   418  	DisableVhostNet bool
   419  
   420  	// EnableVhostUserStore is used to indicate if host supports vhost-user-blk/scsi
   421  	EnableVhostUserStore bool
   422  
   423  	// VhostUserStorePath is the directory path where vhost-user devices
   424  	// related folders, sockets and device nodes should be.
   425  	VhostUserStorePath string
   426  
   427  	// VhostUserStorePathList is the list of valid values for vhost-user paths
   428  	VhostUserStorePathList []string
   429  
   430  	// GuestHookPath is the path within the VM that will be used for 'drop-in' hooks
   431  	GuestHookPath string
   432  
   433  	// VMid is the id of the VM that create the hypervisor if the VM is created by the factory.
   434  	// VMid is "" if the hypervisor is not created by the factory.
   435  	VMid string
   436  
   437  	// SELinux label for the VM
   438  	SELinuxProcessLabel string
   439  
   440  	// Enable annotations by name
   441  	EnableAnnotations []string
   442  }
   443  
   444  // vcpu mapping from vcpu number to thread number
   445  type vcpuThreadIDs struct {
   446  	vcpus map[int]int
   447  }
   448  
   449  func (conf *HypervisorConfig) checkTemplateConfig() error {
   450  	if conf.BootToBeTemplate && conf.BootFromTemplate {
   451  		return fmt.Errorf("Cannot set both 'to be' and 'from' vm tempate")
   452  	}
   453  
   454  	if conf.BootToBeTemplate || conf.BootFromTemplate {
   455  		if conf.MemoryPath == "" {
   456  			return fmt.Errorf("Missing MemoryPath for vm template")
   457  		}
   458  
   459  		if conf.BootFromTemplate && conf.DevicesStatePath == "" {
   460  			return fmt.Errorf("Missing DevicesStatePath to load from vm template")
   461  		}
   462  	}
   463  
   464  	return nil
   465  }
   466  
   467  func (conf *HypervisorConfig) valid() error {
   468  	if conf.KernelPath == "" {
   469  		return fmt.Errorf("Missing kernel path")
   470  	}
   471  
   472  	if conf.ImagePath == "" && conf.InitrdPath == "" {
   473  		return fmt.Errorf("Missing image and initrd path")
   474  	}
   475  
   476  	if err := conf.checkTemplateConfig(); err != nil {
   477  		return err
   478  	}
   479  
   480  	if conf.NumVCPUs == 0 {
   481  		conf.NumVCPUs = defaultVCPUs
   482  	}
   483  
   484  	if conf.MemorySize == 0 {
   485  		conf.MemorySize = defaultMemSzMiB
   486  	}
   487  
   488  	if conf.DefaultBridges == 0 {
   489  		conf.DefaultBridges = defaultBridges
   490  	}
   491  
   492  	if conf.BlockDeviceDriver == "" {
   493  		conf.BlockDeviceDriver = defaultBlockDriver
   494  	}
   495  
   496  	if conf.DefaultMaxVCPUs == 0 {
   497  		conf.DefaultMaxVCPUs = defaultMaxQemuVCPUs
   498  	}
   499  
   500  	if conf.Msize9p == 0 && conf.SharedFS != config.VirtioFS {
   501  		conf.Msize9p = defaultMsize9p
   502  	}
   503  
   504  	return nil
   505  }
   506  
   507  // AddKernelParam allows the addition of new kernel parameters to an existing
   508  // hypervisor configuration.
   509  func (conf *HypervisorConfig) AddKernelParam(p Param) error {
   510  	if p.Key == "" {
   511  		return fmt.Errorf("Empty kernel parameter")
   512  	}
   513  
   514  	conf.KernelParams = append(conf.KernelParams, p)
   515  
   516  	return nil
   517  }
   518  
   519  func (conf *HypervisorConfig) addCustomAsset(a *types.Asset) error {
   520  	if a == nil || a.Path() == "" {
   521  		// We did not get a custom asset, we will use the default one.
   522  		return nil
   523  	}
   524  
   525  	if !a.Valid() {
   526  		return fmt.Errorf("Invalid %s at %s", a.Type(), a.Path())
   527  	}
   528  
   529  	virtLog.Debugf("Using custom %v asset %s", a.Type(), a.Path())
   530  
   531  	if conf.customAssets == nil {
   532  		conf.customAssets = make(map[types.AssetType]*types.Asset)
   533  	}
   534  
   535  	conf.customAssets[a.Type()] = a
   536  
   537  	return nil
   538  }
   539  
   540  func (conf *HypervisorConfig) assetPath(t types.AssetType) (string, error) {
   541  	// Custom assets take precedence over the configured ones
   542  	a, ok := conf.customAssets[t]
   543  	if ok {
   544  		return a.Path(), nil
   545  	}
   546  
   547  	// We could not find a custom asset for the given type, let's
   548  	// fall back to the configured ones.
   549  	switch t {
   550  	case types.KernelAsset:
   551  		return conf.KernelPath, nil
   552  	case types.ImageAsset:
   553  		return conf.ImagePath, nil
   554  	case types.InitrdAsset:
   555  		return conf.InitrdPath, nil
   556  	case types.HypervisorAsset:
   557  		return conf.HypervisorPath, nil
   558  	case types.HypervisorCtlAsset:
   559  		return conf.HypervisorCtlPath, nil
   560  	case types.JailerAsset:
   561  		return conf.JailerPath, nil
   562  	case types.FirmwareAsset:
   563  		return conf.FirmwarePath, nil
   564  	default:
   565  		return "", fmt.Errorf("Unknown asset type %v", t)
   566  	}
   567  }
   568  
   569  func (conf *HypervisorConfig) isCustomAsset(t types.AssetType) bool {
   570  	_, ok := conf.customAssets[t]
   571  	return ok
   572  }
   573  
   574  // KernelAssetPath returns the guest kernel path
   575  func (conf *HypervisorConfig) KernelAssetPath() (string, error) {
   576  	return conf.assetPath(types.KernelAsset)
   577  }
   578  
   579  // CustomKernelAsset returns true if the kernel asset is a custom one, false otherwise.
   580  func (conf *HypervisorConfig) CustomKernelAsset() bool {
   581  	return conf.isCustomAsset(types.KernelAsset)
   582  }
   583  
   584  // ImageAssetPath returns the guest image path
   585  func (conf *HypervisorConfig) ImageAssetPath() (string, error) {
   586  	return conf.assetPath(types.ImageAsset)
   587  }
   588  
   589  // CustomImageAsset returns true if the image asset is a custom one, false otherwise.
   590  func (conf *HypervisorConfig) CustomImageAsset() bool {
   591  	return conf.isCustomAsset(types.ImageAsset)
   592  }
   593  
   594  // InitrdAssetPath returns the guest initrd path
   595  func (conf *HypervisorConfig) InitrdAssetPath() (string, error) {
   596  	return conf.assetPath(types.InitrdAsset)
   597  }
   598  
   599  // CustomInitrdAsset returns true if the initrd asset is a custom one, false otherwise.
   600  func (conf *HypervisorConfig) CustomInitrdAsset() bool {
   601  	return conf.isCustomAsset(types.InitrdAsset)
   602  }
   603  
   604  // HypervisorAssetPath returns the VM hypervisor path
   605  func (conf *HypervisorConfig) HypervisorAssetPath() (string, error) {
   606  	return conf.assetPath(types.HypervisorAsset)
   607  }
   608  
   609  // HypervisorCtlAssetPath returns the VM hypervisor ctl path
   610  func (conf *HypervisorConfig) HypervisorCtlAssetPath() (string, error) {
   611  	return conf.assetPath(types.HypervisorCtlAsset)
   612  }
   613  
   614  // CustomHypervisorAsset returns true if the hypervisor asset is a custom one, false otherwise.
   615  func (conf *HypervisorConfig) CustomHypervisorAsset() bool {
   616  	return conf.isCustomAsset(types.HypervisorAsset)
   617  }
   618  
   619  // FirmwareAssetPath returns the guest firmware path
   620  func (conf *HypervisorConfig) FirmwareAssetPath() (string, error) {
   621  	return conf.assetPath(types.FirmwareAsset)
   622  }
   623  
   624  func appendParam(params []Param, parameter string, value string) []Param {
   625  	return append(params, Param{parameter, value})
   626  }
   627  
   628  // SerializeParams converts []Param to []string
   629  func SerializeParams(params []Param, delim string) []string {
   630  	var parameters []string
   631  
   632  	for _, p := range params {
   633  		if p.Key == "" && p.Value == "" {
   634  			continue
   635  		} else if p.Key == "" {
   636  			parameters = append(parameters, fmt.Sprint(p.Value))
   637  		} else if p.Value == "" {
   638  			parameters = append(parameters, fmt.Sprint(p.Key))
   639  		} else if delim == "" {
   640  			parameters = append(parameters, fmt.Sprint(p.Key))
   641  			parameters = append(parameters, fmt.Sprint(p.Value))
   642  		} else {
   643  			parameters = append(parameters, fmt.Sprintf("%s%s%s", p.Key, delim, p.Value))
   644  		}
   645  	}
   646  
   647  	return parameters
   648  }
   649  
   650  // DeserializeParams converts []string to []Param
   651  func DeserializeParams(parameters []string) []Param {
   652  	var params []Param
   653  
   654  	for _, param := range parameters {
   655  		if param == "" {
   656  			continue
   657  		}
   658  		p := strings.SplitN(param, "=", 2)
   659  		if len(p) == 2 {
   660  			params = append(params, Param{Key: p[0], Value: p[1]})
   661  		} else {
   662  			params = append(params, Param{Key: p[0], Value: ""})
   663  		}
   664  	}
   665  
   666  	return params
   667  }
   668  
   669  func getHostMemorySizeKb(memInfoPath string) (uint64, error) {
   670  	f, err := os.Open(memInfoPath)
   671  	if err != nil {
   672  		return 0, err
   673  	}
   674  	defer f.Close()
   675  
   676  	scanner := bufio.NewScanner(f)
   677  	for scanner.Scan() {
   678  		// Expected format: ["MemTotal:", "1234", "kB"]
   679  		parts := strings.Fields(scanner.Text())
   680  
   681  		// Sanity checks: Skip malformed entries.
   682  		if len(parts) < 3 || parts[0] != "MemTotal:" || parts[2] != "kB" {
   683  			continue
   684  		}
   685  
   686  		sizeKb, err := strconv.ParseUint(parts[1], 0, 64)
   687  		if err != nil {
   688  			continue
   689  		}
   690  
   691  		return sizeKb, nil
   692  	}
   693  
   694  	// Handle errors that may have occurred during the reading of the file.
   695  	if err := scanner.Err(); err != nil {
   696  		return 0, err
   697  	}
   698  
   699  	return 0, fmt.Errorf("unable get MemTotal from %s", memInfoPath)
   700  }
   701  
   702  // RunningOnVMM checks if the system is running inside a VM.
   703  func RunningOnVMM(cpuInfoPath string) (bool, error) {
   704  	if runtime.GOARCH == "arm64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x" {
   705  		virtLog.Info("Unable to know if the system is running inside a VM")
   706  		return false, nil
   707  	}
   708  
   709  	flagsField := "flags"
   710  
   711  	f, err := os.Open(cpuInfoPath)
   712  	if err != nil {
   713  		return false, err
   714  	}
   715  	defer f.Close()
   716  
   717  	scanner := bufio.NewScanner(f)
   718  	for scanner.Scan() {
   719  		// Expected format: ["flags", ":", ...] or ["flags:", ...]
   720  		fields := strings.Fields(scanner.Text())
   721  		if len(fields) < 2 {
   722  			continue
   723  		}
   724  
   725  		if !strings.HasPrefix(fields[0], flagsField) {
   726  			continue
   727  		}
   728  
   729  		for _, field := range fields[1:] {
   730  			if field == "hypervisor" {
   731  				return true, nil
   732  			}
   733  		}
   734  
   735  		// As long as we have been able to analyze the fields from
   736  		// "flags", there is no reason to check what comes next from
   737  		// /proc/cpuinfo, because we already know we are not running
   738  		// on a VMM.
   739  		return false, nil
   740  	}
   741  
   742  	if err := scanner.Err(); err != nil {
   743  		return false, err
   744  	}
   745  
   746  	return false, fmt.Errorf("Couldn't find %q from %q output", flagsField, cpuInfoPath)
   747  }
   748  
   749  func getHypervisorPid(h hypervisor) int {
   750  	pids := h.getPids()
   751  	if len(pids) == 0 {
   752  		return 0
   753  	}
   754  	return pids[0]
   755  }
   756  
   757  func generateVMSocket(id string, useVsock bool, vmStogarePath string) (interface{}, error) {
   758  	if useVsock {
   759  		vhostFd, contextID, err := utils.FindContextID()
   760  		if err != nil {
   761  			return nil, err
   762  		}
   763  
   764  		return types.VSock{
   765  			VhostFd:   vhostFd,
   766  			ContextID: contextID,
   767  			Port:      uint32(vSockPort),
   768  		}, nil
   769  	}
   770  
   771  	path, err := utils.BuildSocketPath(filepath.Join(vmStogarePath, id), defaultSocketName)
   772  	if err != nil {
   773  		return nil, err
   774  	}
   775  
   776  	return types.Socket{
   777  		DeviceID: defaultSocketDeviceID,
   778  		ID:       defaultSocketID,
   779  		HostPath: path,
   780  		Name:     defaultSocketChannelName,
   781  	}, nil
   782  }
   783  
   784  // hypervisor is the virtcontainers hypervisor interface.
   785  // The default hypervisor implementation is Qemu.
   786  type hypervisor interface {
   787  	createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig, stateful bool) error
   788  	startSandbox(timeout int) error
   789  	stopSandbox() error
   790  	pauseSandbox() error
   791  	saveSandbox() error
   792  	resumeSandbox() error
   793  	addDevice(devInfo interface{}, devType deviceType) error
   794  	hotplugAddDevice(devInfo interface{}, devType deviceType) (interface{}, error)
   795  	hotplugRemoveDevice(devInfo interface{}, devType deviceType) (interface{}, error)
   796  	resizeMemory(memMB uint32, memoryBlockSizeMB uint32, probe bool) (uint32, memoryDevice, error)
   797  	resizeVCPUs(vcpus uint32) (uint32, uint32, error)
   798  	getSandboxConsole(sandboxID string) (string, error)
   799  	disconnect()
   800  	capabilities() types.Capabilities
   801  	hypervisorConfig() HypervisorConfig
   802  	getThreadIDs() (vcpuThreadIDs, error)
   803  	cleanup() error
   804  	// getPids returns a slice of hypervisor related process ids.
   805  	// The hypervisor pid must be put at index 0.
   806  	getPids() []int
   807  	fromGrpc(ctx context.Context, hypervisorConfig *HypervisorConfig, j []byte) error
   808  	toGrpc() ([]byte, error)
   809  	check() error
   810  
   811  	save() persistapi.HypervisorState
   812  	load(persistapi.HypervisorState)
   813  
   814  	// generate the socket to communicate the host and guest
   815  	generateSocket(id string, useVsock bool) (interface{}, error)
   816  
   817  	// virtio-block drive offset for virtio-block hotplugging. The offset takes
   818  	// into account drives pre-allocated by the hypervisor, e.g. for the rootfs.
   819  	getVirtDriveOffset() int
   820  }