gitee.com/leisunstar/runtime@v0.0.0-20200521203717-5cef3e7b53f9/virtcontainers/container.go (about)

     1  // +build linux
     2  // Copyright (c) 2016 Intel Corporation
     3  // Copyright (c) 2014,2015,2016,2017 Docker, Inc.
     4  // SPDX-License-Identifier: Apache-2.0
     5  //
     6  
     7  package virtcontainers
     8  
     9  import (
    10  	"context"
    11  	"encoding/hex"
    12  	"fmt"
    13  	"io"
    14  	"os"
    15  	"path/filepath"
    16  	"syscall"
    17  	"time"
    18  
    19  	"github.com/containerd/cgroups"
    20  	vccgroups "github.com/kata-containers/runtime/virtcontainers/pkg/cgroups"
    21  	vcTypes "github.com/kata-containers/runtime/virtcontainers/pkg/types"
    22  	"github.com/kata-containers/runtime/virtcontainers/types"
    23  	"github.com/kata-containers/runtime/virtcontainers/utils"
    24  	specs "github.com/opencontainers/runtime-spec/specs-go"
    25  	opentracing "github.com/opentracing/opentracing-go"
    26  	"github.com/pkg/errors"
    27  	"github.com/sirupsen/logrus"
    28  	"golang.org/x/sys/unix"
    29  
    30  	"github.com/kata-containers/runtime/virtcontainers/device/config"
    31  	"github.com/kata-containers/runtime/virtcontainers/device/manager"
    32  	"github.com/kata-containers/runtime/virtcontainers/pkg/rootless"
    33  	"github.com/kata-containers/runtime/virtcontainers/store"
    34  )
    35  
    36  // https://github.com/torvalds/linux/blob/master/include/uapi/linux/major.h
    37  // This file has definitions for major device numbers.
    38  var cdromMajors = map[int64]string{
    39  	11: "SCSI_CDROM_MAJOR",
    40  	15: "CDU31A_CDROM_MAJOR",
    41  	16: "GOLDSTAR_CDROM_MAJOR",
    42  	17: "OPTICS_CDROM_MAJOR",
    43  	18: "SANYO_CDROM_MAJOR",
    44  	20: "MITSUMI_X_CDROM_MAJOR",
    45  	23: "MITSUMI_CDROM_MAJOR",
    46  	24: "CDU535_CDROM_MAJOR",
    47  	25: "MATSUSHITA_CDROM_MAJOR",
    48  	26: "MATSUSHITA_CDROM2_MAJOR",
    49  	27: "MATSUSHITA_CDROM3_MAJOR",
    50  	28: "MATSUSHITA_CDROM4_MAJOR",
    51  	29: "AZTECH_CDROM_MAJOR",
    52  	32: "CM206_CDROM_MAJOR",
    53  }
    54  
    55  // https://github.com/torvalds/linux/blob/master/include/uapi/linux/major.h
    56  // #define FLOPPY_MAJOR		2
    57  const floppyMajor = int64(2)
    58  
    59  // Process gathers data related to a container process.
    60  type Process struct {
    61  	// Token is the process execution context ID. It must be
    62  	// unique per sandbox.
    63  	// Token is used to manipulate processes for containers
    64  	// that have not started yet, and later identify them
    65  	// uniquely within a sandbox.
    66  	Token string
    67  
    68  	// Pid is the process ID as seen by the host software
    69  	// stack, e.g. CRI-O, containerd. This is typically the
    70  	// shim PID.
    71  	Pid int
    72  
    73  	StartTime time.Time
    74  }
    75  
    76  // ContainerStatus describes a container status.
    77  type ContainerStatus struct {
    78  	ID        string
    79  	State     types.ContainerState
    80  	PID       int
    81  	StartTime time.Time
    82  	RootFs    string
    83  	Spec      *specs.Spec
    84  
    85  	// Annotations allow clients to store arbitrary values,
    86  	// for example to add additional status values required
    87  	// to support particular specifications.
    88  	Annotations map[string]string
    89  }
    90  
    91  // ThrottlingData gather the date related to container cpu throttling.
    92  type ThrottlingData struct {
    93  	// Number of periods with throttling active
    94  	Periods uint64 `json:"periods,omitempty"`
    95  	// Number of periods when the container hit its throttling limit.
    96  	ThrottledPeriods uint64 `json:"throttled_periods,omitempty"`
    97  	// Aggregate time the container was throttled for in nanoseconds.
    98  	ThrottledTime uint64 `json:"throttled_time,omitempty"`
    99  }
   100  
   101  // CPUUsage denotes the usage of a CPU.
   102  // All CPU stats are aggregate since container inception.
   103  type CPUUsage struct {
   104  	// Total CPU time consumed.
   105  	// Units: nanoseconds.
   106  	TotalUsage uint64 `json:"total_usage,omitempty"`
   107  	// Total CPU time consumed per core.
   108  	// Units: nanoseconds.
   109  	PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
   110  	// Time spent by tasks of the cgroup in kernel mode.
   111  	// Units: nanoseconds.
   112  	UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
   113  	// Time spent by tasks of the cgroup in user mode.
   114  	// Units: nanoseconds.
   115  	UsageInUsermode uint64 `json:"usage_in_usermode"`
   116  }
   117  
   118  // CPUStats describes the cpu stats
   119  type CPUStats struct {
   120  	CPUUsage       CPUUsage       `json:"cpu_usage,omitempty"`
   121  	ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
   122  }
   123  
   124  // MemoryData gather the data related to memory
   125  type MemoryData struct {
   126  	Usage    uint64 `json:"usage,omitempty"`
   127  	MaxUsage uint64 `json:"max_usage,omitempty"`
   128  	Failcnt  uint64 `json:"failcnt"`
   129  	Limit    uint64 `json:"limit"`
   130  }
   131  
   132  // MemoryStats describes the memory stats
   133  type MemoryStats struct {
   134  	// memory used for cache
   135  	Cache uint64 `json:"cache,omitempty"`
   136  	// usage of memory
   137  	Usage MemoryData `json:"usage,omitempty"`
   138  	// usage of memory  swap
   139  	SwapUsage MemoryData `json:"swap_usage,omitempty"`
   140  	// usage of kernel memory
   141  	KernelUsage MemoryData `json:"kernel_usage,omitempty"`
   142  	// usage of kernel TCP memory
   143  	KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
   144  	// if true, memory usage is accounted for throughout a hierarchy of cgroups.
   145  	UseHierarchy bool `json:"use_hierarchy"`
   146  
   147  	Stats map[string]uint64 `json:"stats,omitempty"`
   148  }
   149  
   150  // PidsStats describes the pids stats
   151  type PidsStats struct {
   152  	// number of pids in the cgroup
   153  	Current uint64 `json:"current,omitempty"`
   154  	// active pids hard limit
   155  	Limit uint64 `json:"limit,omitempty"`
   156  }
   157  
   158  // BlkioStatEntry gather date related to a block device
   159  type BlkioStatEntry struct {
   160  	Major uint64 `json:"major,omitempty"`
   161  	Minor uint64 `json:"minor,omitempty"`
   162  	Op    string `json:"op,omitempty"`
   163  	Value uint64 `json:"value,omitempty"`
   164  }
   165  
   166  // BlkioStats describes block io stats
   167  type BlkioStats struct {
   168  	// number of bytes tranferred to and from the block device
   169  	IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
   170  	IoServicedRecursive     []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
   171  	IoQueuedRecursive       []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
   172  	IoServiceTimeRecursive  []BlkioStatEntry `json:"io_service_time_recursive,omitempty"`
   173  	IoWaitTimeRecursive     []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"`
   174  	IoMergedRecursive       []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
   175  	IoTimeRecursive         []BlkioStatEntry `json:"io_time_recursive,omitempty"`
   176  	SectorsRecursive        []BlkioStatEntry `json:"sectors_recursive,omitempty"`
   177  }
   178  
   179  // HugetlbStats describes hugetable memory stats
   180  type HugetlbStats struct {
   181  	// current res_counter usage for hugetlb
   182  	Usage uint64 `json:"usage,omitempty"`
   183  	// maximum usage ever recorded.
   184  	MaxUsage uint64 `json:"max_usage,omitempty"`
   185  	// number of times hugetlb usage allocation failure.
   186  	Failcnt uint64 `json:"failcnt"`
   187  }
   188  
   189  // CgroupStats describes all cgroup subsystem stats
   190  type CgroupStats struct {
   191  	CPUStats    CPUStats    `json:"cpu_stats,omitempty"`
   192  	MemoryStats MemoryStats `json:"memory_stats,omitempty"`
   193  	PidsStats   PidsStats   `json:"pids_stats,omitempty"`
   194  	BlkioStats  BlkioStats  `json:"blkio_stats,omitempty"`
   195  	// the map is in the format "size of hugepage: stats of the hugepage"
   196  	HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
   197  }
   198  
   199  // NetworkStats describe all network stats.
   200  type NetworkStats struct {
   201  	// Name is the name of the network interface.
   202  	Name string `json:"name,omitempty"`
   203  
   204  	RxBytes   uint64 `json:"rx_bytes,omitempty"`
   205  	RxPackets uint64 `json:"rx_packets,omitempty"`
   206  	RxErrors  uint64 `json:"rx_errors,omitempty"`
   207  	RxDropped uint64 `json:"rx_dropped,omitempty"`
   208  	TxBytes   uint64 `json:"tx_bytes,omitempty"`
   209  	TxPackets uint64 `json:"tx_packets,omitempty"`
   210  	TxErrors  uint64 `json:"tx_errors,omitempty"`
   211  	TxDropped uint64 `json:"tx_dropped,omitempty"`
   212  }
   213  
   214  // ContainerStats describes a container stats.
   215  type ContainerStats struct {
   216  	CgroupStats  *CgroupStats
   217  	NetworkStats []*NetworkStats
   218  }
   219  
   220  // ContainerResources describes container resources
   221  type ContainerResources struct {
   222  	// VCPUs are the number of vCPUs that are being used by the container
   223  	VCPUs uint32
   224  
   225  	// Mem is the memory that is being used by the container
   226  	MemByte int64
   227  }
   228  
   229  // ContainerConfig describes one container runtime configuration.
   230  type ContainerConfig struct {
   231  	ID string
   232  
   233  	// RootFs is the container workload image on the host.
   234  	RootFs RootFs
   235  
   236  	// ReadOnlyRootfs indicates if the rootfs should be mounted readonly
   237  	ReadonlyRootfs bool
   238  
   239  	// Cmd specifies the command to run on a container
   240  	Cmd types.Cmd
   241  
   242  	// Annotations allow clients to store arbitrary values,
   243  	// for example to add additional status values required
   244  	// to support particular specifications.
   245  	Annotations map[string]string
   246  
   247  	Mounts []Mount
   248  
   249  	// Device configuration for devices that must be available within the container.
   250  	DeviceInfos []config.DeviceInfo
   251  
   252  	// Resources container resources
   253  	Resources specs.LinuxResources
   254  
   255  	// Raw OCI specification, it won't be saved to disk.
   256  	CustomSpec *specs.Spec `json:"-"`
   257  }
   258  
   259  // valid checks that the container configuration is valid.
   260  func (c *ContainerConfig) valid() bool {
   261  	if c == nil {
   262  		return false
   263  	}
   264  
   265  	if c.ID == "" {
   266  		return false
   267  	}
   268  
   269  	return true
   270  }
   271  
   272  // SystemMountsInfo describes additional information for system mounts that the agent
   273  // needs to handle
   274  type SystemMountsInfo struct {
   275  	// Indicates if /dev has been passed as a bind mount for the host /dev
   276  	BindMountDev bool
   277  
   278  	// Size of /dev/shm assigned on the host.
   279  	DevShmSize uint
   280  }
   281  
   282  // ContainerDevice describes a device associated with container
   283  type ContainerDevice struct {
   284  	// ID is device id referencing the device from sandbox's device manager
   285  	ID string
   286  
   287  	// ContainerPath is device path displayed in container
   288  	ContainerPath string
   289  
   290  	// FileMode permission bits for the device.
   291  	FileMode os.FileMode
   292  
   293  	// UID is user ID in the container namespace
   294  	UID uint32
   295  
   296  	// GID is group ID in the container namespace
   297  	GID uint32
   298  }
   299  
   300  // RootFs describes the container's rootfs.
   301  type RootFs struct {
   302  	// Source specifies the BlockDevice path
   303  	Source string
   304  	// Target specify where the rootfs is mounted if it has been mounted
   305  	Target string
   306  	// Type specifies the type of filesystem to mount.
   307  	Type string
   308  	// Options specifies zero or more fstab style mount options.
   309  	Options []string
   310  	// Mounted specifies whether the rootfs has be mounted or not
   311  	Mounted bool
   312  }
   313  
   314  // Container is composed of a set of containers and a runtime environment.
   315  // A Container can be created, deleted, started, stopped, listed, entered, paused and restored.
   316  type Container struct {
   317  	id        string
   318  	sandboxID string
   319  
   320  	rootFs RootFs
   321  
   322  	config *ContainerConfig
   323  
   324  	sandbox *Sandbox
   325  
   326  	containerPath string
   327  	rootfsSuffix  string
   328  
   329  	state types.ContainerState
   330  
   331  	process Process
   332  
   333  	mounts []Mount
   334  
   335  	devices []ContainerDevice
   336  
   337  	systemMountsInfo SystemMountsInfo
   338  
   339  	ctx context.Context
   340  
   341  	store *store.VCStore
   342  }
   343  
   344  // ID returns the container identifier string.
   345  func (c *Container) ID() string {
   346  	return c.id
   347  }
   348  
   349  // Logger returns a logrus logger appropriate for logging Container messages
   350  func (c *Container) Logger() *logrus.Entry {
   351  	return virtLog.WithFields(logrus.Fields{
   352  		"subsystem": "container",
   353  		"sandbox":   c.sandboxID,
   354  	})
   355  }
   356  
   357  func (c *Container) trace(name string) (opentracing.Span, context.Context) {
   358  	if c.ctx == nil {
   359  		c.Logger().WithField("type", "bug").Error("trace called before context set")
   360  		c.ctx = context.Background()
   361  	}
   362  
   363  	span, ctx := opentracing.StartSpanFromContext(c.ctx, name)
   364  
   365  	span.SetTag("subsystem", "container")
   366  
   367  	return span, ctx
   368  }
   369  
   370  // Sandbox returns the sandbox handler related to this container.
   371  func (c *Container) Sandbox() VCSandbox {
   372  	return c.sandbox
   373  }
   374  
   375  // Process returns the container process.
   376  func (c *Container) Process() Process {
   377  	return c.process
   378  }
   379  
   380  // GetToken returns the token related to this container's process.
   381  func (c *Container) GetToken() string {
   382  	return c.process.Token
   383  }
   384  
   385  // GetPid returns the pid related to this container's process.
   386  func (c *Container) GetPid() int {
   387  	return c.process.Pid
   388  }
   389  
   390  func (c *Container) setStateFstype(fstype string) error {
   391  	c.state.Fstype = fstype
   392  
   393  	return nil
   394  }
   395  
   396  // GetAnnotations returns container's annotations
   397  func (c *Container) GetAnnotations() map[string]string {
   398  	return c.config.Annotations
   399  }
   400  
   401  // GetPatchedOCISpec returns container's OCI specification
   402  // This OCI specification was patched when the sandbox was created
   403  // by containerCapabilities(), SetEphemeralStorageType() and others
   404  // in order to support:
   405  // * capabilities
   406  // * Ephemeral storage
   407  // * k8s empty dir
   408  // If you need the original (vanilla) OCI spec,
   409  // use compatoci.GetContainerSpec() instead.
   410  func (c *Container) GetPatchedOCISpec() *specs.Spec {
   411  	return c.config.CustomSpec
   412  }
   413  
   414  // storeContainer stores a container config.
   415  func (c *Container) storeContainer() error {
   416  	if err := c.sandbox.Save(); err != nil {
   417  		return err
   418  	}
   419  	return nil
   420  }
   421  
   422  // setContainerState sets both the in-memory and on-disk state of the
   423  // container.
   424  func (c *Container) setContainerState(state types.StateString) error {
   425  	if state == "" {
   426  		return vcTypes.ErrNeedState
   427  	}
   428  
   429  	c.Logger().Debugf("Setting container state from %v to %v", c.state.State, state)
   430  	// update in-memory state
   431  	c.state.State = state
   432  
   433  	if useOldStore(c.sandbox.ctx) {
   434  		// experimental runtime use "persist.json" which doesn't need "state.json" anymore
   435  		// update on-disk state
   436  		if err := c.store.Store(store.State, c.state); err != nil {
   437  			return err
   438  		}
   439  	} else {
   440  		// flush data to storage
   441  		if err := c.sandbox.Save(); err != nil {
   442  			return err
   443  		}
   444  	}
   445  
   446  	return nil
   447  }
   448  
   449  func (c *Container) shareFiles(m Mount, idx int, hostSharedDir, guestSharedDir string) (string, bool, error) {
   450  	randBytes, err := utils.GenerateRandomBytes(8)
   451  	if err != nil {
   452  		return "", false, err
   453  	}
   454  
   455  	filename := fmt.Sprintf("%s-%s-%s", c.id, hex.EncodeToString(randBytes), filepath.Base(m.Destination))
   456  	guestDest := filepath.Join(guestSharedDir, filename)
   457  
   458  	// copy file to contaier's rootfs if filesystem sharing is not supported, otherwise
   459  	// bind mount it in the shared directory.
   460  	caps := c.sandbox.hypervisor.capabilities()
   461  	if !caps.IsFsSharingSupported() {
   462  		c.Logger().Debug("filesystem sharing is not supported, files will be copied")
   463  
   464  		fileInfo, err := os.Stat(m.Source)
   465  		if err != nil {
   466  			return "", false, err
   467  		}
   468  
   469  		// Ignore the mount if this is not a regular file (excludes
   470  		// directory, socket, device, ...) as it cannot be handled by
   471  		// a simple copy. But this should not be treated as an error,
   472  		// only as a limitation.
   473  		if !fileInfo.Mode().IsRegular() {
   474  			c.Logger().WithField("ignored-file", m.Source).Debug("Ignoring non-regular file as FS sharing not supported")
   475  			return "", true, nil
   476  		}
   477  
   478  		if err := c.sandbox.agent.copyFile(m.Source, guestDest); err != nil {
   479  			return "", false, err
   480  		}
   481  	} else {
   482  		// These mounts are created in the shared dir
   483  		mountDest := filepath.Join(hostSharedDir, c.sandbox.id, filename)
   484  		if err := bindMount(c.ctx, m.Source, mountDest, false, "private"); err != nil {
   485  			return "", false, err
   486  		}
   487  		// Save HostPath mount value into the mount list of the container.
   488  		c.mounts[idx].HostPath = mountDest
   489  	}
   490  
   491  	return guestDest, false, nil
   492  }
   493  
   494  // mountSharedDirMounts handles bind-mounts by bindmounting to the host shared
   495  // directory which is mounted through 9pfs in the VM.
   496  // It also updates the container mount list with the HostPath info, and store
   497  // container mounts to the storage. This way, we will have the HostPath info
   498  // available when we will need to unmount those mounts.
   499  func (c *Container) mountSharedDirMounts(hostSharedDir, guestSharedDir string) (sharedDirMounts map[string]Mount, ignoredMounts map[string]Mount, err error) {
   500  	sharedDirMounts = make(map[string]Mount)
   501  	ignoredMounts = make(map[string]Mount)
   502  	var devicesToDetach []string
   503  	defer func() {
   504  		if err != nil {
   505  			for _, id := range devicesToDetach {
   506  				c.sandbox.devManager.DetachDevice(id, c.sandbox)
   507  			}
   508  		}
   509  	}()
   510  	for idx, m := range c.mounts {
   511  		// Skip mounting certain system paths from the source on the host side
   512  		// into the container as it does not make sense to do so.
   513  		// Example sources could be /sys/fs/cgroup etc.
   514  		if isSystemMount(m.Source) {
   515  			continue
   516  		}
   517  
   518  		if m.Type != "bind" {
   519  			continue
   520  		}
   521  
   522  		// We need to treat /dev/shm as a special case. This is passed as a bind mount in the spec,
   523  		// but it does not make sense to pass this as a 9p mount from the host side.
   524  		// This needs to be handled purely in the guest, by allocating memory for this inside the VM.
   525  		if m.Destination == "/dev/shm" {
   526  			continue
   527  		}
   528  
   529  		// Check if mount is a block device file. If it is, the block device will be attached to the host
   530  		// instead of passing this as a shared mount.
   531  		if len(m.BlockDeviceID) > 0 {
   532  			// Attach this block device, all other devices passed in the config have been attached at this point
   533  			if err = c.sandbox.devManager.AttachDevice(m.BlockDeviceID, c.sandbox); err != nil {
   534  				return nil, nil, err
   535  			}
   536  			devicesToDetach = append(devicesToDetach, m.BlockDeviceID)
   537  			continue
   538  		}
   539  
   540  		// Ignore /dev, directories and all other device files. We handle
   541  		// only regular files in /dev. It does not make sense to pass the host
   542  		// device nodes to the guest.
   543  		if isHostDevice(m.Destination) {
   544  			continue
   545  		}
   546  
   547  		var ignore bool
   548  		var guestDest string
   549  		guestDest, ignore, err = c.shareFiles(m, idx, hostSharedDir, guestSharedDir)
   550  		if err != nil {
   551  			return nil, nil, err
   552  		}
   553  
   554  		// Expand the list of mounts to ignore.
   555  		if ignore {
   556  			ignoredMounts[m.Source] = Mount{Source: m.Source}
   557  			continue
   558  		}
   559  
   560  		// Check if mount is readonly, let the agent handle the readonly mount
   561  		// within the VM.
   562  		readonly := false
   563  		for _, flag := range m.Options {
   564  			if flag == "ro" {
   565  				readonly = true
   566  				break
   567  			}
   568  		}
   569  
   570  		sharedDirMount := Mount{
   571  			Source:      guestDest,
   572  			Destination: m.Destination,
   573  			Type:        m.Type,
   574  			Options:     m.Options,
   575  			ReadOnly:    readonly,
   576  		}
   577  
   578  		sharedDirMounts[sharedDirMount.Destination] = sharedDirMount
   579  	}
   580  
   581  	return sharedDirMounts, ignoredMounts, nil
   582  }
   583  
   584  func (c *Container) unmountHostMounts() error {
   585  	var span opentracing.Span
   586  	span, c.ctx = c.trace("unmountHostMounts")
   587  	defer span.Finish()
   588  
   589  	for _, m := range c.mounts {
   590  		if m.HostPath != "" {
   591  			span, _ := c.trace("unmount")
   592  			span.SetTag("host-path", m.HostPath)
   593  
   594  			if err := syscall.Unmount(m.HostPath, syscall.MNT_DETACH|UmountNoFollow); err != nil {
   595  				c.Logger().WithFields(logrus.Fields{
   596  					"host-path": m.HostPath,
   597  					"error":     err,
   598  				}).Warn("Could not umount")
   599  				return err
   600  			}
   601  
   602  			if m.Type == "bind" {
   603  				s, err := os.Stat(m.HostPath)
   604  				if err != nil {
   605  					return errors.Wrapf(err, "Could not stat host-path %v", m.HostPath)
   606  				}
   607  				// Remove the empty file or directory
   608  				if s.Mode().IsRegular() && s.Size() == 0 {
   609  					os.Remove(m.HostPath)
   610  				}
   611  				if s.Mode().IsDir() {
   612  					syscall.Rmdir(m.HostPath)
   613  				}
   614  			}
   615  
   616  			span.Finish()
   617  		}
   618  	}
   619  
   620  	return nil
   621  }
   622  
   623  func filterDevices(c *Container, devices []ContainerDevice) (ret []ContainerDevice) {
   624  	for _, dev := range devices {
   625  		major, _ := c.sandbox.devManager.GetDeviceByID(dev.ID).GetMajorMinor()
   626  		if _, ok := cdromMajors[major]; ok {
   627  			c.Logger().WithFields(logrus.Fields{
   628  				"device": dev.ContainerPath,
   629  			}).Info("Not attach device because it is a CDROM")
   630  			continue
   631  		}
   632  
   633  		if major == floppyMajor {
   634  			c.Logger().WithFields(logrus.Fields{
   635  				"device": dev.ContainerPath,
   636  			}).Info("Not attaching device because it is a floppy drive")
   637  			continue
   638  		}
   639  
   640  		ret = append(ret, dev)
   641  	}
   642  	return
   643  }
   644  
   645  func (c *Container) createBlockDevices() error {
   646  	if !c.checkBlockDeviceSupport() {
   647  		c.Logger().Warn("Block device not supported")
   648  		return nil
   649  	}
   650  
   651  	// iterate all mounts and create block device if it's block based.
   652  	for i, m := range c.mounts {
   653  		if len(m.BlockDeviceID) > 0 || m.Type != "bind" {
   654  			// Non-empty m.BlockDeviceID indicates there's already one device
   655  			// associated with the mount,so no need to create a new device for it
   656  			// and we only create block device for bind mount
   657  			continue
   658  		}
   659  
   660  		var stat unix.Stat_t
   661  		if err := unix.Stat(m.Source, &stat); err != nil {
   662  			return fmt.Errorf("stat %q failed: %v", m.Source, err)
   663  		}
   664  
   665  		var di *config.DeviceInfo
   666  		var err error
   667  
   668  		// Check if mount is a block device file. If it is, the block device will be attached to the host
   669  		// instead of passing this as a shared mount.
   670  		if stat.Mode&unix.S_IFBLK == unix.S_IFBLK {
   671  			di = &config.DeviceInfo{
   672  				HostPath:      m.Source,
   673  				ContainerPath: m.Destination,
   674  				DevType:       "b",
   675  				Major:         int64(unix.Major(stat.Rdev)),
   676  				Minor:         int64(unix.Minor(stat.Rdev)),
   677  			}
   678  			// check whether source can be used as a pmem device
   679  		} else if di, err = config.PmemDeviceInfo(m.Source, m.Destination); err != nil {
   680  			c.Logger().WithError(err).
   681  				WithField("mount-source", m.Source).
   682  				Debug("no loop device")
   683  		}
   684  
   685  		if err == nil && di != nil {
   686  			b, err := c.sandbox.devManager.NewDevice(*di)
   687  
   688  			if err != nil {
   689  				// Do not return an error, try to create
   690  				// devices for other mounts
   691  				c.Logger().WithError(err).WithField("mount-source", m.Source).
   692  					Error("device manager failed to create new device")
   693  				continue
   694  
   695  			}
   696  
   697  			c.mounts[i].BlockDeviceID = b.DeviceID()
   698  		}
   699  	}
   700  
   701  	return nil
   702  }
   703  
   704  // newContainer creates a Container structure from a sandbox and a container configuration.
   705  func newContainer(sandbox *Sandbox, contConfig *ContainerConfig) (*Container, error) {
   706  	span, _ := sandbox.trace("newContainer")
   707  	defer span.Finish()
   708  
   709  	if !contConfig.valid() {
   710  		return &Container{}, fmt.Errorf("Invalid container configuration")
   711  	}
   712  
   713  	c := &Container{
   714  		id:            contConfig.ID,
   715  		sandboxID:     sandbox.id,
   716  		rootFs:        contConfig.RootFs,
   717  		config:        contConfig,
   718  		sandbox:       sandbox,
   719  		containerPath: filepath.Join(sandbox.id, contConfig.ID),
   720  		rootfsSuffix:  "rootfs",
   721  		state:         types.ContainerState{},
   722  		process:       Process{},
   723  		mounts:        contConfig.Mounts,
   724  		ctx:           sandbox.ctx,
   725  	}
   726  
   727  	if useOldStore(sandbox.ctx) {
   728  		ctrStore, err := store.NewVCContainerStore(sandbox.ctx, c.sandboxID, c.id)
   729  		if err != nil {
   730  			return nil, err
   731  		}
   732  		c.store = ctrStore
   733  		state, err := c.store.LoadContainerState()
   734  		if err == nil {
   735  			c.state = state
   736  		}
   737  
   738  		var process Process
   739  		if err := c.store.Load(store.Process, &process); err == nil {
   740  			c.process = process
   741  		}
   742  	} else {
   743  		// experimental runtime use "persist.json" instead of legacy "state.json" as storage
   744  		err := c.Restore()
   745  		if err == nil {
   746  			//container restored
   747  			return c, nil
   748  		}
   749  
   750  		// Unexpected error
   751  		if !os.IsNotExist(err) && err != errContainerPersistNotExist {
   752  			return nil, err
   753  		}
   754  	}
   755  
   756  	// Go to next step for first created container
   757  	if err := c.createMounts(); err != nil {
   758  		return nil, err
   759  	}
   760  
   761  	if err := c.createDevices(contConfig); err != nil {
   762  		return nil, err
   763  	}
   764  
   765  	return c, nil
   766  }
   767  
   768  func (c *Container) loadMounts() ([]Mount, error) {
   769  	var mounts []Mount
   770  	if err := c.store.Load(store.Mounts, &mounts); err != nil {
   771  		return []Mount{}, err
   772  	}
   773  
   774  	return mounts, nil
   775  }
   776  
   777  func (c *Container) loadDevices() ([]ContainerDevice, error) {
   778  	var devices []ContainerDevice
   779  
   780  	if err := c.store.Load(store.DeviceIDs, &devices); err != nil {
   781  		return []ContainerDevice{}, err
   782  	}
   783  
   784  	return devices, nil
   785  }
   786  
   787  func (c *Container) createMounts() error {
   788  	if useOldStore(c.sandbox.ctx) {
   789  		mounts, err := c.loadMounts()
   790  		if err == nil {
   791  			// restore mounts from disk
   792  			c.mounts = mounts
   793  			return nil
   794  		}
   795  	}
   796  
   797  	// Create block devices for newly created container
   798  	if err := c.createBlockDevices(); err != nil {
   799  		return err
   800  	}
   801  
   802  	return nil
   803  }
   804  
   805  func (c *Container) createDevices(contConfig *ContainerConfig) error {
   806  	// If sandbox supports "newstore", only newly created container can reach this function,
   807  	// so we don't call restore when `supportNewStore` is true
   808  	if useOldStore(c.sandbox.ctx) {
   809  		// Devices will be found in storage after create stage has completed.
   810  		// We load devices from storage at all other stages.
   811  		storedDevices, err := c.loadDevices()
   812  		if err == nil {
   813  			c.devices = storedDevices
   814  			return nil
   815  		}
   816  	}
   817  
   818  	// If devices were not found in storage, create Device implementations
   819  	// from the configuration. This should happen at create.
   820  	var storedDevices []ContainerDevice
   821  	for _, info := range contConfig.DeviceInfos {
   822  		dev, err := c.sandbox.devManager.NewDevice(info)
   823  		if err != nil {
   824  			return err
   825  		}
   826  
   827  		storedDevices = append(storedDevices, ContainerDevice{
   828  			ID:            dev.DeviceID(),
   829  			ContainerPath: info.ContainerPath,
   830  			FileMode:      info.FileMode,
   831  			UID:           info.UID,
   832  			GID:           info.GID,
   833  		})
   834  	}
   835  	c.devices = filterDevices(c, storedDevices)
   836  	return nil
   837  }
   838  
   839  // rollbackFailingContainerCreation rolls back important steps that might have
   840  // been performed before the container creation failed.
   841  // - Unplug CPU and memory resources from the VM.
   842  // - Unplug devices from the VM.
   843  func (c *Container) rollbackFailingContainerCreation() {
   844  	if err := c.detachDevices(); err != nil {
   845  		c.Logger().WithError(err).Error("rollback failed detachDevices()")
   846  	}
   847  	if err := c.removeDrive(); err != nil {
   848  		c.Logger().WithError(err).Error("rollback failed removeDrive()")
   849  	}
   850  	if err := c.unmountHostMounts(); err != nil {
   851  		c.Logger().WithError(err).Error("rollback failed unmountHostMounts()")
   852  	}
   853  	if err := bindUnmountContainerRootfs(c.ctx, kataHostSharedDir(), c.sandbox.id, c.id); err != nil {
   854  		c.Logger().WithError(err).Error("rollback failed bindUnmountContainerRootfs()")
   855  	}
   856  }
   857  
   858  func (c *Container) checkBlockDeviceSupport() bool {
   859  	if !c.sandbox.config.HypervisorConfig.DisableBlockDeviceUse {
   860  		agentCaps := c.sandbox.agent.capabilities()
   861  		hypervisorCaps := c.sandbox.hypervisor.capabilities()
   862  
   863  		if agentCaps.IsBlockDeviceSupported() && hypervisorCaps.IsBlockDeviceHotplugSupported() {
   864  			return true
   865  		}
   866  	}
   867  
   868  	return false
   869  }
   870  
   871  // createContainer creates and start a container inside a Sandbox. It has to be
   872  // called only when a new container, not known by the sandbox, has to be created.
   873  func (c *Container) create() (err error) {
   874  	// In case the container creation fails, the following takes care
   875  	// of rolling back all the actions previously performed.
   876  	defer func() {
   877  		if err != nil {
   878  			c.rollbackFailingContainerCreation()
   879  		}
   880  	}()
   881  
   882  	if c.checkBlockDeviceSupport() {
   883  		if err = c.hotplugDrive(); err != nil {
   884  			return
   885  		}
   886  	}
   887  
   888  	var (
   889  		machineType        = c.sandbox.config.HypervisorConfig.HypervisorMachineType
   890  		normalAttachedDevs []ContainerDevice //for q35: normally attached devices
   891  		delayAttachedDevs  []ContainerDevice //for q35: delay attached devices, for example, large bar space device
   892  	)
   893  	// Fix: https://github.com/kata-containers/runtime/issues/2460
   894  	if machineType == QemuQ35 {
   895  		// add Large Bar space device to delayAttachedDevs
   896  		for _, device := range c.devices {
   897  			var isLargeBarSpace bool
   898  			isLargeBarSpace, err = manager.IsVFIOLargeBarSpaceDevice(device.ContainerPath)
   899  			if err != nil {
   900  				return
   901  			}
   902  			if isLargeBarSpace {
   903  				delayAttachedDevs = append(delayAttachedDevs, device)
   904  			} else {
   905  				normalAttachedDevs = append(normalAttachedDevs, device)
   906  			}
   907  		}
   908  	} else {
   909  		normalAttachedDevs = c.devices
   910  	}
   911  
   912  	c.Logger().WithFields(logrus.Fields{
   913  		"machine_type": machineType,
   914  		"devices":      normalAttachedDevs,
   915  	}).Info("normal attach devices")
   916  	if len(normalAttachedDevs) > 0 {
   917  		if err = c.attachDevices(normalAttachedDevs); err != nil {
   918  			return
   919  		}
   920  	}
   921  
   922  	// Deduce additional system mount info that should be handled by the agent
   923  	// inside the VM
   924  	c.getSystemMountInfo()
   925  
   926  	process, err := c.sandbox.agent.createContainer(c.sandbox, c)
   927  	if err != nil {
   928  		return err
   929  	}
   930  	c.process = *process
   931  
   932  	// lazy attach device after createContainer for q35
   933  	if machineType == QemuQ35 && len(delayAttachedDevs) > 0 {
   934  		c.Logger().WithFields(logrus.Fields{
   935  			"machine_type": machineType,
   936  			"devices":      delayAttachedDevs,
   937  		}).Info("lazy attach devices")
   938  		if err = c.attachDevices(delayAttachedDevs); err != nil {
   939  			return
   940  		}
   941  	}
   942  
   943  	if !rootless.IsRootless() && !c.sandbox.config.SandboxCgroupOnly {
   944  		if err = c.cgroupsCreate(); err != nil {
   945  			return
   946  		}
   947  	}
   948  
   949  	if err = c.setContainerState(types.StateReady); err != nil {
   950  		return
   951  	}
   952  
   953  	return nil
   954  }
   955  
   956  func (c *Container) delete() error {
   957  	if c.state.State != types.StateReady &&
   958  		c.state.State != types.StateStopped {
   959  		return fmt.Errorf("Container not ready or stopped, impossible to delete")
   960  	}
   961  
   962  	// Remove the container from sandbox structure
   963  	if err := c.sandbox.removeContainer(c.id); err != nil {
   964  		return err
   965  	}
   966  
   967  	// If running rootless, there are no cgroups to remove
   968  	if !c.sandbox.config.SandboxCgroupOnly || !rootless.IsRootless() {
   969  		if err := c.cgroupsDelete(); err != nil {
   970  			return err
   971  		}
   972  	}
   973  
   974  	return c.sandbox.storeSandbox()
   975  }
   976  
   977  // checkSandboxRunning validates the container state.
   978  //
   979  // cmd specifies the operation (or verb) that the retrieval is destined
   980  // for and is only used to make the returned error as descriptive as
   981  // possible.
   982  func (c *Container) checkSandboxRunning(cmd string) error {
   983  	if cmd == "" {
   984  		return fmt.Errorf("Cmd cannot be empty")
   985  	}
   986  
   987  	if c.sandbox.state.State != types.StateRunning {
   988  		return fmt.Errorf("Sandbox not running, impossible to %s the container", cmd)
   989  	}
   990  
   991  	return nil
   992  }
   993  
   994  func (c *Container) getSystemMountInfo() {
   995  	// check if /dev needs to be bind mounted from host /dev
   996  	c.systemMountsInfo.BindMountDev = false
   997  
   998  	for _, m := range c.mounts {
   999  		if m.Source == "/dev" && m.Destination == "/dev" && m.Type == "bind" {
  1000  			c.systemMountsInfo.BindMountDev = true
  1001  		}
  1002  	}
  1003  
  1004  	// TODO Deduce /dev/shm size. See https://github.com/clearcontainers/runtime/issues/138
  1005  }
  1006  
  1007  func (c *Container) start() error {
  1008  	if err := c.checkSandboxRunning("start"); err != nil {
  1009  		return err
  1010  	}
  1011  
  1012  	if c.state.State != types.StateReady &&
  1013  		c.state.State != types.StateStopped {
  1014  		return fmt.Errorf("Container not ready or stopped, impossible to start")
  1015  	}
  1016  
  1017  	if err := c.state.ValidTransition(c.state.State, types.StateRunning); err != nil {
  1018  		return err
  1019  	}
  1020  
  1021  	if err := c.sandbox.agent.startContainer(c.sandbox, c); err != nil {
  1022  		c.Logger().WithError(err).Error("Failed to start container")
  1023  
  1024  		if err := c.stop(true); err != nil {
  1025  			c.Logger().WithError(err).Warn("Failed to stop container")
  1026  		}
  1027  		return err
  1028  	}
  1029  
  1030  	return c.setContainerState(types.StateRunning)
  1031  }
  1032  
  1033  func (c *Container) stop(force bool) error {
  1034  	span, _ := c.trace("stop")
  1035  	defer span.Finish()
  1036  
  1037  	// In case the container status has been updated implicitly because
  1038  	// the container process has terminated, it might be possible that
  1039  	// someone try to stop the container, and we don't want to issue an
  1040  	// error in that case. This should be a no-op.
  1041  	//
  1042  	// This has to be handled before the transition validation since this
  1043  	// is an exception.
  1044  	if c.state.State == types.StateStopped {
  1045  		c.Logger().Info("Container already stopped")
  1046  		return nil
  1047  	}
  1048  
  1049  	if err := c.state.ValidTransition(c.state.State, types.StateStopped); err != nil {
  1050  		return err
  1051  	}
  1052  
  1053  	defer func() {
  1054  		span, _ := c.trace("stopShim")
  1055  		defer span.Finish()
  1056  
  1057  		// If shim is still running something went wrong
  1058  		// Make sure we stop the shim process
  1059  		if running, _ := isShimRunning(c.process.Pid); running {
  1060  			l := c.Logger()
  1061  			l.Error("Failed to stop container so stopping dangling shim")
  1062  			if err := stopShim(c.process.Pid); err != nil {
  1063  				l.WithError(err).Warn("failed to stop shim")
  1064  			}
  1065  		}
  1066  
  1067  	}()
  1068  
  1069  	// Here we expect that stop() has been called because the container
  1070  	// process returned or because it received a signal. In case of a
  1071  	// signal, we want to give it some time to end the container process.
  1072  	// However, if the signal didn't reach its goal, the caller still
  1073  	// expects this container to be stopped, that's why we should not
  1074  	// return an error, but instead try to kill it forcefully.
  1075  	if err := waitForShim(c.process.Pid); err != nil {
  1076  		// Force the container to be killed.
  1077  		if err := c.kill(syscall.SIGKILL, true); err != nil && !force {
  1078  			return err
  1079  		}
  1080  
  1081  		// Wait for the end of container process. We expect this call
  1082  		// to succeed. Indeed, we have already given a second chance
  1083  		// to the container by trying to kill it with SIGKILL, there
  1084  		// is no reason to try to go further if we got an error.
  1085  		if err := waitForShim(c.process.Pid); err != nil && !force {
  1086  			return err
  1087  		}
  1088  	}
  1089  
  1090  	// Force the container to be killed. For most of the cases, this
  1091  	// should not matter and it should return an error that will be
  1092  	// ignored.
  1093  	// But for the specific case where the shim has been SIGKILL'ed,
  1094  	// the container is still running inside the VM. And this is why
  1095  	// this signal will ensure the container will get killed to match
  1096  	// the state of the shim. This will allow the following call to
  1097  	// stopContainer() to succeed in such particular case.
  1098  	c.kill(syscall.SIGKILL, true)
  1099  
  1100  	// Since the agent has supported the MultiWaitProcess, it's better to
  1101  	// wait the process here to make sure the process has exited before to
  1102  	// issue stopContainer, otherwise the RemoveContainerRequest in it will
  1103  	// get failed if the process hasn't exited.
  1104  	c.sandbox.agent.waitProcess(c, c.id)
  1105  
  1106  	defer func() {
  1107  		// Save device and drive data.
  1108  		// TODO: can we merge this saving with setContainerState()?
  1109  		if err := c.sandbox.Save(); err != nil {
  1110  			c.Logger().WithError(err).Info("save container state failed")
  1111  		}
  1112  	}()
  1113  
  1114  	if err := c.sandbox.agent.stopContainer(c.sandbox, *c); err != nil && !force {
  1115  		return err
  1116  	}
  1117  
  1118  	if err := c.unmountHostMounts(); err != nil && !force {
  1119  		return err
  1120  	}
  1121  
  1122  	if err := bindUnmountContainerRootfs(c.ctx, kataHostSharedDir(), c.sandbox.id, c.id); err != nil && !force {
  1123  		return err
  1124  	}
  1125  
  1126  	if err := c.detachDevices(); err != nil && !force {
  1127  		return err
  1128  	}
  1129  
  1130  	if err := c.removeDrive(); err != nil && !force {
  1131  		return err
  1132  	}
  1133  
  1134  	shareDir := filepath.Join(kataHostSharedDir(), c.sandbox.id, c.id)
  1135  	if err := syscall.Rmdir(shareDir); err != nil {
  1136  		c.Logger().WithError(err).WithField("share-dir", shareDir).Warn("Could not remove container share dir")
  1137  	}
  1138  
  1139  	// container was killed by force, container MUST change its state
  1140  	// as soon as possible just in case one of below operations fail leaving
  1141  	// the containers in a bad state.
  1142  	if err := c.setContainerState(types.StateStopped); err != nil {
  1143  		return err
  1144  	}
  1145  
  1146  	return nil
  1147  }
  1148  
  1149  func (c *Container) enter(cmd types.Cmd) (*Process, error) {
  1150  	if err := c.checkSandboxRunning("enter"); err != nil {
  1151  		return nil, err
  1152  	}
  1153  
  1154  	if c.state.State != types.StateReady &&
  1155  		c.state.State != types.StateRunning {
  1156  		return nil, fmt.Errorf("Container not ready or running, " +
  1157  			"impossible to enter")
  1158  	}
  1159  
  1160  	process, err := c.sandbox.agent.exec(c.sandbox, *c, cmd)
  1161  	if err != nil {
  1162  		return nil, err
  1163  	}
  1164  
  1165  	return process, nil
  1166  }
  1167  
  1168  func (c *Container) wait(processID string) (int32, error) {
  1169  	if c.state.State != types.StateReady &&
  1170  		c.state.State != types.StateRunning {
  1171  		return 0, fmt.Errorf("Container not ready or running, " +
  1172  			"impossible to wait")
  1173  	}
  1174  
  1175  	return c.sandbox.agent.waitProcess(c, processID)
  1176  }
  1177  
  1178  func (c *Container) kill(signal syscall.Signal, all bool) error {
  1179  	return c.signalProcess(c.process.Token, signal, all)
  1180  }
  1181  
  1182  func (c *Container) signalProcess(processID string, signal syscall.Signal, all bool) error {
  1183  	if c.sandbox.state.State != types.StateReady && c.sandbox.state.State != types.StateRunning {
  1184  		return fmt.Errorf("Sandbox not ready or running, impossible to signal the container")
  1185  	}
  1186  
  1187  	if c.state.State != types.StateReady && c.state.State != types.StateRunning && c.state.State != types.StatePaused {
  1188  		return fmt.Errorf("Container not ready, running or paused, impossible to signal the container")
  1189  	}
  1190  
  1191  	return c.sandbox.agent.signalProcess(c, processID, signal, all)
  1192  }
  1193  
  1194  func (c *Container) winsizeProcess(processID string, height, width uint32) error {
  1195  	if c.state.State != types.StateReady && c.state.State != types.StateRunning {
  1196  		return fmt.Errorf("Container not ready or running, impossible to signal the container")
  1197  	}
  1198  
  1199  	return c.sandbox.agent.winsizeProcess(c, processID, height, width)
  1200  }
  1201  
  1202  func (c *Container) ioStream(processID string) (io.WriteCloser, io.Reader, io.Reader, error) {
  1203  	if c.state.State != types.StateReady && c.state.State != types.StateRunning {
  1204  		return nil, nil, nil, fmt.Errorf("Container not ready or running, impossible to signal the container")
  1205  	}
  1206  
  1207  	stream := newIOStream(c.sandbox, c, processID)
  1208  
  1209  	return stream.stdin(), stream.stdout(), stream.stderr(), nil
  1210  }
  1211  
  1212  func (c *Container) processList(options ProcessListOptions) (ProcessList, error) {
  1213  	if err := c.checkSandboxRunning("ps"); err != nil {
  1214  		return nil, err
  1215  	}
  1216  
  1217  	if c.state.State != types.StateRunning {
  1218  		return nil, fmt.Errorf("Container not running, impossible to list processes")
  1219  	}
  1220  
  1221  	return c.sandbox.agent.processListContainer(c.sandbox, *c, options)
  1222  }
  1223  
  1224  func (c *Container) stats() (*ContainerStats, error) {
  1225  	if err := c.checkSandboxRunning("stats"); err != nil {
  1226  		return nil, err
  1227  	}
  1228  	return c.sandbox.agent.statsContainer(c.sandbox, *c)
  1229  }
  1230  
  1231  func (c *Container) update(resources specs.LinuxResources) error {
  1232  	if err := c.checkSandboxRunning("update"); err != nil {
  1233  		return err
  1234  	}
  1235  
  1236  	if state := c.state.State; !(state == types.StateRunning || state == types.StateReady) {
  1237  		return fmt.Errorf("Container(%s) not running or ready, impossible to update", state)
  1238  	}
  1239  
  1240  	if c.config.Resources.CPU == nil {
  1241  		c.config.Resources.CPU = &specs.LinuxCPU{}
  1242  	}
  1243  
  1244  	if cpu := resources.CPU; cpu != nil {
  1245  		if p := cpu.Period; p != nil && *p != 0 {
  1246  			c.config.Resources.CPU.Period = p
  1247  		}
  1248  		if q := cpu.Quota; q != nil && *q != 0 {
  1249  			c.config.Resources.CPU.Quota = q
  1250  		}
  1251  	}
  1252  
  1253  	if c.config.Resources.Memory == nil {
  1254  		c.config.Resources.Memory = &specs.LinuxMemory{}
  1255  	}
  1256  
  1257  	if mem := resources.Memory; mem != nil && mem.Limit != nil {
  1258  		c.config.Resources.Memory.Limit = mem.Limit
  1259  	}
  1260  
  1261  	if err := c.sandbox.updateResources(); err != nil {
  1262  		return err
  1263  	}
  1264  
  1265  	if !c.sandbox.config.SandboxCgroupOnly {
  1266  		if err := c.cgroupsUpdate(resources); err != nil {
  1267  			return err
  1268  		}
  1269  	}
  1270  
  1271  	return c.sandbox.agent.updateContainer(c.sandbox, *c, resources)
  1272  }
  1273  
  1274  func (c *Container) pause() error {
  1275  	if err := c.checkSandboxRunning("pause"); err != nil {
  1276  		return err
  1277  	}
  1278  
  1279  	if c.state.State != types.StateRunning {
  1280  		return fmt.Errorf("Container not running, impossible to pause")
  1281  	}
  1282  
  1283  	if err := c.sandbox.agent.pauseContainer(c.sandbox, *c); err != nil {
  1284  		return err
  1285  	}
  1286  
  1287  	return c.setContainerState(types.StatePaused)
  1288  }
  1289  
  1290  func (c *Container) resume() error {
  1291  	if err := c.checkSandboxRunning("resume"); err != nil {
  1292  		return err
  1293  	}
  1294  
  1295  	if c.state.State != types.StatePaused {
  1296  		return fmt.Errorf("Container not paused, impossible to resume")
  1297  	}
  1298  
  1299  	if err := c.sandbox.agent.resumeContainer(c.sandbox, *c); err != nil {
  1300  		return err
  1301  	}
  1302  
  1303  	return c.setContainerState(types.StateRunning)
  1304  }
  1305  
  1306  func (c *Container) hotplugDrive() error {
  1307  	var dev device
  1308  	var err error
  1309  
  1310  	// container rootfs is blockdevice backed and isn't mounted
  1311  	if !c.rootFs.Mounted {
  1312  		dev, err = getDeviceForPath(c.rootFs.Source)
  1313  		// there is no "rootfs" dir on block device backed rootfs
  1314  		c.rootfsSuffix = ""
  1315  	} else {
  1316  		dev, err = getDeviceForPath(c.rootFs.Target)
  1317  	}
  1318  
  1319  	if err == errMountPointNotFound {
  1320  		return nil
  1321  	}
  1322  
  1323  	if err != nil {
  1324  		return err
  1325  	}
  1326  
  1327  	c.Logger().WithFields(logrus.Fields{
  1328  		"device-major": dev.major,
  1329  		"device-minor": dev.minor,
  1330  		"mount-point":  dev.mountPoint,
  1331  	}).Info("device details")
  1332  
  1333  	isDM, err := checkStorageDriver(dev.major, dev.minor)
  1334  	if err != nil {
  1335  		return err
  1336  	}
  1337  
  1338  	if !isDM {
  1339  		return nil
  1340  	}
  1341  
  1342  	devicePath := c.rootFs.Source
  1343  	fsType := c.rootFs.Type
  1344  	if c.rootFs.Mounted {
  1345  		if dev.mountPoint == c.rootFs.Target {
  1346  			c.rootfsSuffix = ""
  1347  		}
  1348  		// If device mapper device, then fetch the full path of the device
  1349  		devicePath, fsType, err = utils.GetDevicePathAndFsType(dev.mountPoint)
  1350  		if err != nil {
  1351  			return err
  1352  		}
  1353  	}
  1354  
  1355  	devicePath, err = filepath.EvalSymlinks(devicePath)
  1356  	if err != nil {
  1357  		return err
  1358  	}
  1359  
  1360  	c.Logger().WithFields(logrus.Fields{
  1361  		"device-path": devicePath,
  1362  		"fs-type":     fsType,
  1363  	}).Info("Block device detected")
  1364  
  1365  	if err = c.plugDevice(devicePath); err != nil {
  1366  		return err
  1367  	}
  1368  
  1369  	return c.setStateFstype(fsType)
  1370  }
  1371  
  1372  func (c *Container) plugDevice(devicePath string) error {
  1373  	var stat unix.Stat_t
  1374  	if err := unix.Stat(devicePath, &stat); err != nil {
  1375  		return fmt.Errorf("stat %q failed: %v", devicePath, err)
  1376  	}
  1377  
  1378  	if c.checkBlockDeviceSupport() && stat.Mode&unix.S_IFBLK == unix.S_IFBLK {
  1379  		b, err := c.sandbox.devManager.NewDevice(config.DeviceInfo{
  1380  			HostPath:      devicePath,
  1381  			ContainerPath: filepath.Join(kataGuestSharedDir(), c.id),
  1382  			DevType:       "b",
  1383  			Major:         int64(unix.Major(stat.Rdev)),
  1384  			Minor:         int64(unix.Minor(stat.Rdev)),
  1385  		})
  1386  		if err != nil {
  1387  			return fmt.Errorf("device manager failed to create rootfs device for %q: %v", devicePath, err)
  1388  		}
  1389  
  1390  		c.state.BlockDeviceID = b.DeviceID()
  1391  
  1392  		// attach rootfs device
  1393  		if err := c.sandbox.devManager.AttachDevice(b.DeviceID(), c.sandbox); err != nil {
  1394  			return err
  1395  		}
  1396  	}
  1397  	return nil
  1398  }
  1399  
  1400  // isDriveUsed checks if a drive has been used for container rootfs
  1401  func (c *Container) isDriveUsed() bool {
  1402  	return !(c.state.Fstype == "")
  1403  }
  1404  
  1405  func (c *Container) removeDrive() (err error) {
  1406  	if c.isDriveUsed() {
  1407  		c.Logger().Info("unplugging block device")
  1408  
  1409  		devID := c.state.BlockDeviceID
  1410  		err := c.sandbox.devManager.DetachDevice(devID, c.sandbox)
  1411  		if err != nil && err != manager.ErrDeviceNotAttached {
  1412  			return err
  1413  		}
  1414  
  1415  		if err = c.sandbox.devManager.RemoveDevice(devID); err != nil {
  1416  			c.Logger().WithFields(logrus.Fields{
  1417  				"container": c.id,
  1418  				"device-id": devID,
  1419  			}).WithError(err).Error("remove device failed")
  1420  
  1421  			// ignore the device not exist error
  1422  			if err != manager.ErrDeviceNotExist {
  1423  				return err
  1424  			}
  1425  		}
  1426  	}
  1427  
  1428  	return nil
  1429  }
  1430  
  1431  func (c *Container) attachDevices(devices []ContainerDevice) error {
  1432  	// there's no need to do rollback when error happens,
  1433  	// because if attachDevices fails, container creation will fail too,
  1434  	// and rollbackFailingContainerCreation could do all the rollbacks
  1435  
  1436  	// since devices with large bar space require delayed attachment,
  1437  	// the devices need to be split into two lists, normalAttachedDevs and delayAttachedDevs.
  1438  	// so c.device is not used here. See issue https://github.com/kata-containers/runtime/issues/2460.
  1439  	for _, dev := range devices {
  1440  		if err := c.sandbox.devManager.AttachDevice(dev.ID, c.sandbox); err != nil {
  1441  			return err
  1442  		}
  1443  	}
  1444  	return nil
  1445  }
  1446  
  1447  func (c *Container) detachDevices() error {
  1448  	for _, dev := range c.devices {
  1449  		err := c.sandbox.devManager.DetachDevice(dev.ID, c.sandbox)
  1450  		if err != nil && err != manager.ErrDeviceNotAttached {
  1451  			return err
  1452  		}
  1453  
  1454  		if err = c.sandbox.devManager.RemoveDevice(dev.ID); err != nil {
  1455  			c.Logger().WithFields(logrus.Fields{
  1456  				"container": c.id,
  1457  				"device-id": dev.ID,
  1458  			}).WithError(err).Error("remove device failed")
  1459  
  1460  			// ignore the device not exist error
  1461  			if err != manager.ErrDeviceNotExist {
  1462  				return err
  1463  			}
  1464  		}
  1465  	}
  1466  	return nil
  1467  }
  1468  
  1469  // cgroupsCreate creates cgroups on the host for the associated container
  1470  func (c *Container) cgroupsCreate() (err error) {
  1471  	spec := c.GetPatchedOCISpec()
  1472  	if spec == nil {
  1473  		return errorMissingOCISpec
  1474  	}
  1475  
  1476  	// https://github.com/kata-containers/runtime/issues/168
  1477  	resources := specs.LinuxResources{
  1478  		CPU: nil,
  1479  	}
  1480  
  1481  	if spec.Linux != nil && spec.Linux.Resources != nil {
  1482  		resources.CPU = validCPUResources(spec.Linux.Resources.CPU)
  1483  	}
  1484  
  1485  	c.state.CgroupPath, err = vccgroups.ValidCgroupPath(spec.Linux.CgroupsPath, c.sandbox.config.SystemdCgroup)
  1486  	if err != nil {
  1487  		return fmt.Errorf("Invalid cgroup path: %v", err)
  1488  	}
  1489  
  1490  	cgroup, err := cgroupsNewFunc(cgroups.V1,
  1491  		cgroups.StaticPath(c.state.CgroupPath), &resources)
  1492  	if err != nil {
  1493  		return fmt.Errorf("Could not create cgroup for %v: %v", c.state.CgroupPath, err)
  1494  	}
  1495  
  1496  	c.config.Resources = resources
  1497  
  1498  	// Add shim into cgroup
  1499  	if c.process.Pid > 0 {
  1500  		if err := cgroup.Add(cgroups.Process{Pid: c.process.Pid}); err != nil {
  1501  			return fmt.Errorf("Could not add PID %d to cgroup %v: %v", c.process.Pid, spec.Linux.CgroupsPath, err)
  1502  		}
  1503  	}
  1504  
  1505  	return nil
  1506  }
  1507  
  1508  // cgroupsDelete deletes the cgroups on the host for the associated container
  1509  func (c *Container) cgroupsDelete() error {
  1510  
  1511  	if c.state.CgroupPath == "" {
  1512  		c.Logger().Debug("container does not have host cgroups: nothing to update")
  1513  		return nil
  1514  	}
  1515  
  1516  	cgroup, err := cgroupsLoadFunc(cgroups.V1,
  1517  		cgroups.StaticPath(c.state.CgroupPath))
  1518  
  1519  	if err == cgroups.ErrCgroupDeleted {
  1520  		// cgroup already deleted
  1521  		return nil
  1522  	}
  1523  
  1524  	if err != nil {
  1525  		return fmt.Errorf("Could not load container cgroup %v: %v", c.state.CgroupPath, err)
  1526  	}
  1527  
  1528  	// move running process here, that way cgroup can be removed
  1529  	parent, err := parentCgroup(cgroups.V1, c.state.CgroupPath)
  1530  	if err != nil {
  1531  		// parent cgroup doesn't exist, that means there are no process running
  1532  		// and the container cgroup was removed.
  1533  		c.Logger().WithError(err).Warn("Container cgroup doesn't exist")
  1534  		return nil
  1535  	}
  1536  
  1537  	if err := cgroup.MoveTo(parent); err != nil {
  1538  		// Don't fail, cgroup can be deleted
  1539  		c.Logger().WithError(err).Warn("Could not move container process into parent cgroup")
  1540  	}
  1541  
  1542  	if err := cgroup.Delete(); err != nil {
  1543  		return fmt.Errorf("Could not delete container cgroup path='%v': error='%v'", c.state.CgroupPath, err)
  1544  	}
  1545  
  1546  	return nil
  1547  }
  1548  
  1549  // cgroupsUpdate updates cgroups on the host for the associated container
  1550  func (c *Container) cgroupsUpdate(resources specs.LinuxResources) error {
  1551  
  1552  	if c.state.CgroupPath == "" {
  1553  		c.Logger().Debug("container does not have host cgroups: nothing to update")
  1554  		return nil
  1555  	}
  1556  	cgroup, err := cgroupsLoadFunc(cgroups.V1,
  1557  		cgroups.StaticPath(c.state.CgroupPath))
  1558  	if err != nil {
  1559  		return fmt.Errorf("Could not load cgroup %v: %v", c.state.CgroupPath, err)
  1560  	}
  1561  
  1562  	// Issue: https://github.com/kata-containers/runtime/issues/168
  1563  	r := specs.LinuxResources{
  1564  		CPU: validCPUResources(resources.CPU),
  1565  	}
  1566  
  1567  	// update cgroup
  1568  	if err := cgroup.Update(&r); err != nil {
  1569  		return fmt.Errorf("Could not update container cgroup path='%v': error='%v'", c.state.CgroupPath, err)
  1570  	}
  1571  
  1572  	// store new resources
  1573  	c.config.Resources = r
  1574  	if err := c.storeContainer(); err != nil {
  1575  		return err
  1576  	}
  1577  
  1578  	return nil
  1579  }