github.com/kata-containers/runtime@v0.0.0-20210505125100-04f29832a923/virtcontainers/sandbox.go (about)

     1  // Copyright (c) 2016 Intel Corporation
     2  // Copyright (c) 2020 Adobe Inc.
     3  //
     4  // SPDX-License-Identifier: Apache-2.0
     5  //
     6  
     7  package virtcontainers
     8  
     9  import (
    10  	"context"
    11  	"fmt"
    12  	"io"
    13  	"math"
    14  	"net"
    15  	"os"
    16  	"strings"
    17  	"sync"
    18  	"syscall"
    19  
    20  	"github.com/containerd/cgroups"
    21  	"github.com/containernetworking/plugins/pkg/ns"
    22  	"github.com/opencontainers/runc/libcontainer/configs"
    23  	specs "github.com/opencontainers/runtime-spec/specs-go"
    24  	opentracing "github.com/opentracing/opentracing-go"
    25  	"github.com/pkg/errors"
    26  	"github.com/sirupsen/logrus"
    27  	"github.com/vishvananda/netlink"
    28  
    29  	"github.com/kata-containers/agent/protocols/grpc"
    30  	"github.com/kata-containers/runtime/virtcontainers/device/api"
    31  	"github.com/kata-containers/runtime/virtcontainers/device/config"
    32  	"github.com/kata-containers/runtime/virtcontainers/device/drivers"
    33  	deviceManager "github.com/kata-containers/runtime/virtcontainers/device/manager"
    34  	exp "github.com/kata-containers/runtime/virtcontainers/experimental"
    35  	"github.com/kata-containers/runtime/virtcontainers/persist"
    36  	persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api"
    37  	"github.com/kata-containers/runtime/virtcontainers/pkg/annotations"
    38  	vccgroups "github.com/kata-containers/runtime/virtcontainers/pkg/cgroups"
    39  	"github.com/kata-containers/runtime/virtcontainers/pkg/compatoci"
    40  	"github.com/kata-containers/runtime/virtcontainers/pkg/rootless"
    41  	vcTypes "github.com/kata-containers/runtime/virtcontainers/pkg/types"
    42  	"github.com/kata-containers/runtime/virtcontainers/store"
    43  	"github.com/kata-containers/runtime/virtcontainers/types"
    44  	"github.com/kata-containers/runtime/virtcontainers/utils"
    45  	"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
    46  )
    47  
    48  const (
    49  	// vmStartTimeout represents the time in seconds a sandbox can wait before
    50  	// to consider the VM starting operation failed.
    51  	vmStartTimeout = 10
    52  
    53  	// DirMode is the permission bits used for creating a directory
    54  	DirMode = os.FileMode(0750) | os.ModeDir
    55  )
    56  
    57  // SandboxStatus describes a sandbox status.
    58  type SandboxStatus struct {
    59  	ID               string
    60  	State            types.SandboxState
    61  	Hypervisor       HypervisorType
    62  	HypervisorConfig HypervisorConfig
    63  	Agent            AgentType
    64  	ContainersStatus []ContainerStatus
    65  
    66  	// Annotations allow clients to store arbitrary values,
    67  	// for example to add additional status values required
    68  	// to support particular specifications.
    69  	Annotations map[string]string
    70  }
    71  
    72  // SandboxStats describes a sandbox's stats
    73  type SandboxStats struct {
    74  	CgroupStats CgroupStats
    75  	Cpus        int
    76  }
    77  
    78  // SandboxConfig is a Sandbox configuration.
    79  type SandboxConfig struct {
    80  	ID string
    81  
    82  	Hostname string
    83  
    84  	HypervisorType   HypervisorType
    85  	HypervisorConfig HypervisorConfig
    86  
    87  	AgentType   AgentType
    88  	AgentConfig interface{}
    89  
    90  	ProxyType   ProxyType
    91  	ProxyConfig ProxyConfig
    92  
    93  	ShimType   ShimType
    94  	ShimConfig interface{}
    95  
    96  	NetworkConfig NetworkConfig
    97  
    98  	// Volumes is a list of shared volumes between the host and the Sandbox.
    99  	Volumes []types.Volume
   100  
   101  	// Containers describe the list of containers within a Sandbox.
   102  	// This list can be empty and populated by adding containers
   103  	// to the Sandbox a posteriori.
   104  	//TODO: this should be a map to avoid duplicated containers
   105  	Containers []ContainerConfig
   106  
   107  	// Annotations keys must be unique strings and must be name-spaced
   108  	// with e.g. reverse domain notation (org.clearlinux.key).
   109  	Annotations map[string]string
   110  
   111  	ShmSize uint64
   112  
   113  	// SharePidNs sets all containers to share the same sandbox level pid namespace.
   114  	SharePidNs bool
   115  
   116  	// types.Stateful keeps sandbox resources in memory across APIs. Users will be responsible
   117  	// for calling Release() to release the memory resources.
   118  	Stateful bool
   119  
   120  	// SystemdCgroup enables systemd cgroup support
   121  	SystemdCgroup bool
   122  
   123  	// SandboxCgroupOnly enables cgroup only at podlevel in the host
   124  	SandboxCgroupOnly bool
   125  
   126  	// EnableAgentPidNs allows containers to share pid namespace with the agent
   127  	EnableAgentPidNs bool
   128  
   129  	DisableGuestSeccomp bool
   130  
   131  	// Experimental features enabled
   132  	Experimental []exp.Feature
   133  
   134  	// Cgroups specifies specific cgroup settings for the various subsystems that the container is
   135  	// placed into to limit the resources the container has available
   136  	Cgroups *configs.Cgroup
   137  }
   138  
   139  func (s *Sandbox) trace(name string) (opentracing.Span, context.Context) {
   140  	if s.ctx == nil {
   141  		s.Logger().WithField("type", "bug").Error("trace called before context set")
   142  		s.ctx = context.Background()
   143  	}
   144  
   145  	span, ctx := opentracing.StartSpanFromContext(s.ctx, name)
   146  
   147  	span.SetTag("subsystem", "sandbox")
   148  
   149  	return span, ctx
   150  }
   151  
   152  func (s *Sandbox) startProxy() error {
   153  
   154  	// If the proxy is KataBuiltInProxyType type, it needs to restart the proxy
   155  	// to watch the guest console if it hadn't been watched.
   156  	if s.agent == nil {
   157  		return fmt.Errorf("sandbox %s missed agent pointer", s.ID())
   158  	}
   159  
   160  	return s.agent.startProxy(s)
   161  }
   162  
   163  // valid checks that the sandbox configuration is valid.
   164  func (sandboxConfig *SandboxConfig) valid() bool {
   165  	if sandboxConfig.ID == "" {
   166  		return false
   167  	}
   168  
   169  	if _, err := newHypervisor(sandboxConfig.HypervisorType); err != nil {
   170  		sandboxConfig.HypervisorType = QemuHypervisor
   171  	}
   172  
   173  	// validate experimental features
   174  	for _, f := range sandboxConfig.Experimental {
   175  		if exp.Get(f.Name) == nil {
   176  			return false
   177  		}
   178  	}
   179  	return true
   180  }
   181  
   182  // Sandbox is composed of a set of containers and a runtime environment.
   183  // A Sandbox can be created, deleted, started, paused, stopped, listed, entered, and restored.
   184  type Sandbox struct {
   185  	id string
   186  
   187  	sync.Mutex
   188  	factory    Factory
   189  	hypervisor hypervisor
   190  	agent      agent
   191  	store      *store.VCStore
   192  	// store is used to replace VCStore step by step
   193  	newStore persistapi.PersistDriver
   194  
   195  	network Network
   196  	monitor *monitor
   197  
   198  	config *SandboxConfig
   199  
   200  	devManager api.DeviceManager
   201  
   202  	volumes []types.Volume
   203  
   204  	containers map[string]*Container
   205  
   206  	state types.SandboxState
   207  
   208  	networkNS NetworkNamespace
   209  
   210  	annotationsLock *sync.RWMutex
   211  
   212  	wg *sync.WaitGroup
   213  
   214  	shmSize           uint64
   215  	sharePidNs        bool
   216  	stateful          bool
   217  	seccompSupported  bool
   218  	disableVMShutdown bool
   219  
   220  	cgroupMgr *vccgroups.Manager
   221  
   222  	ctx context.Context
   223  }
   224  
   225  // ID returns the sandbox identifier string.
   226  func (s *Sandbox) ID() string {
   227  	return s.id
   228  }
   229  
   230  // Logger returns a logrus logger appropriate for logging Sandbox messages
   231  func (s *Sandbox) Logger() *logrus.Entry {
   232  	return virtLog.WithFields(logrus.Fields{
   233  		"subsystem": "sandbox",
   234  		"sandbox":   s.id,
   235  	})
   236  }
   237  
   238  // Annotations returns any annotation that a user could have stored through the sandbox.
   239  func (s *Sandbox) Annotations(key string) (string, error) {
   240  	s.annotationsLock.RLock()
   241  	defer s.annotationsLock.RUnlock()
   242  
   243  	value, exist := s.config.Annotations[key]
   244  	if !exist {
   245  		return "", fmt.Errorf("Annotations key %s does not exist", key)
   246  	}
   247  
   248  	return value, nil
   249  }
   250  
   251  // SetAnnotations sets or adds an annotations
   252  func (s *Sandbox) SetAnnotations(annotations map[string]string) error {
   253  	s.annotationsLock.Lock()
   254  	defer s.annotationsLock.Unlock()
   255  
   256  	for k, v := range annotations {
   257  		s.config.Annotations[k] = v
   258  	}
   259  	return nil
   260  }
   261  
   262  // GetAnnotations returns sandbox's annotations
   263  func (s *Sandbox) GetAnnotations() map[string]string {
   264  	s.annotationsLock.RLock()
   265  	defer s.annotationsLock.RUnlock()
   266  
   267  	return s.config.Annotations
   268  }
   269  
   270  // GetNetNs returns the network namespace of the current sandbox.
   271  func (s *Sandbox) GetNetNs() string {
   272  	return s.networkNS.NetNsPath
   273  }
   274  
   275  // GetAllContainers returns all containers.
   276  func (s *Sandbox) GetAllContainers() []VCContainer {
   277  	ifa := make([]VCContainer, len(s.containers))
   278  
   279  	i := 0
   280  	for _, v := range s.containers {
   281  		ifa[i] = v
   282  		i++
   283  	}
   284  
   285  	return ifa
   286  }
   287  
   288  // GetContainer returns the container named by the containerID.
   289  func (s *Sandbox) GetContainer(containerID string) VCContainer {
   290  	if c, ok := s.containers[containerID]; ok {
   291  		return c
   292  	}
   293  	return nil
   294  }
   295  
   296  // Release closes the agent connection and removes sandbox from internal list.
   297  func (s *Sandbox) Release() error {
   298  	s.Logger().Info("release sandbox")
   299  	globalSandboxList.removeSandbox(s.id)
   300  	if s.monitor != nil {
   301  		s.monitor.stop()
   302  	}
   303  	s.hypervisor.disconnect()
   304  	return s.agent.disconnect()
   305  }
   306  
   307  func (s *Sandbox) releaseStatelessSandbox() error {
   308  	if s.stateful {
   309  		return nil
   310  	}
   311  
   312  	return s.Release()
   313  }
   314  
   315  // Status gets the status of the sandbox
   316  // TODO: update container status properly, see kata-containers/runtime#253
   317  func (s *Sandbox) Status() SandboxStatus {
   318  	var contStatusList []ContainerStatus
   319  	for _, c := range s.containers {
   320  		rootfs := c.config.RootFs.Source
   321  		if c.config.RootFs.Mounted {
   322  			rootfs = c.config.RootFs.Target
   323  		}
   324  
   325  		contStatusList = append(contStatusList, ContainerStatus{
   326  			ID:          c.id,
   327  			State:       c.state,
   328  			PID:         c.process.Pid,
   329  			StartTime:   c.process.StartTime,
   330  			RootFs:      rootfs,
   331  			Annotations: c.config.Annotations,
   332  		})
   333  	}
   334  
   335  	return SandboxStatus{
   336  		ID:               s.id,
   337  		State:            s.state,
   338  		Hypervisor:       s.config.HypervisorType,
   339  		HypervisorConfig: s.config.HypervisorConfig,
   340  		Agent:            s.config.AgentType,
   341  		ContainersStatus: contStatusList,
   342  		Annotations:      s.config.Annotations,
   343  	}
   344  }
   345  
   346  // Monitor returns a error channel for watcher to watch at
   347  func (s *Sandbox) Monitor() (chan error, error) {
   348  	if s.state.State != types.StateRunning {
   349  		return nil, fmt.Errorf("Sandbox is not running")
   350  	}
   351  
   352  	s.Lock()
   353  	if s.monitor == nil {
   354  		s.monitor = newMonitor(s)
   355  	}
   356  	s.Unlock()
   357  
   358  	return s.monitor.newWatcher()
   359  }
   360  
   361  // WaitProcess waits on a container process and return its exit code
   362  func (s *Sandbox) WaitProcess(containerID, processID string) (int32, error) {
   363  	if s.state.State != types.StateRunning {
   364  		return 0, fmt.Errorf("Sandbox not running")
   365  	}
   366  
   367  	c, err := s.findContainer(containerID)
   368  	if err != nil {
   369  		return 0, err
   370  	}
   371  
   372  	return c.wait(processID)
   373  }
   374  
   375  // SignalProcess sends a signal to a process of a container when all is false.
   376  // When all is true, it sends the signal to all processes of a container.
   377  func (s *Sandbox) SignalProcess(containerID, processID string, signal syscall.Signal, all bool) error {
   378  	if s.state.State != types.StateRunning {
   379  		return fmt.Errorf("Sandbox not running")
   380  	}
   381  
   382  	c, err := s.findContainer(containerID)
   383  	if err != nil {
   384  		return err
   385  	}
   386  
   387  	return c.signalProcess(processID, signal, all)
   388  }
   389  
   390  // WinsizeProcess resizes the tty window of a process
   391  func (s *Sandbox) WinsizeProcess(containerID, processID string, height, width uint32) error {
   392  	if s.state.State != types.StateRunning {
   393  		return fmt.Errorf("Sandbox not running")
   394  	}
   395  
   396  	c, err := s.findContainer(containerID)
   397  	if err != nil {
   398  		return err
   399  	}
   400  
   401  	return c.winsizeProcess(processID, height, width)
   402  }
   403  
   404  // IOStream returns stdin writer, stdout reader and stderr reader of a process
   405  func (s *Sandbox) IOStream(containerID, processID string) (io.WriteCloser, io.Reader, io.Reader, error) {
   406  	if s.state.State != types.StateRunning {
   407  		return nil, nil, nil, fmt.Errorf("Sandbox not running")
   408  	}
   409  
   410  	c, err := s.findContainer(containerID)
   411  	if err != nil {
   412  		return nil, nil, nil, err
   413  	}
   414  
   415  	return c.ioStream(processID)
   416  }
   417  
   418  func createAssets(ctx context.Context, sandboxConfig *SandboxConfig) error {
   419  	span, _ := trace(ctx, "createAssets")
   420  	defer span.Finish()
   421  
   422  	for _, name := range types.AssetTypes() {
   423  		a, err := types.NewAsset(sandboxConfig.Annotations, name)
   424  		if err != nil {
   425  			return err
   426  		}
   427  
   428  		if err := sandboxConfig.HypervisorConfig.addCustomAsset(a); err != nil {
   429  			return err
   430  		}
   431  	}
   432  
   433  	_, imageErr := sandboxConfig.HypervisorConfig.assetPath(types.ImageAsset)
   434  	_, initrdErr := sandboxConfig.HypervisorConfig.assetPath(types.InitrdAsset)
   435  
   436  	if imageErr != nil && initrdErr != nil {
   437  		return fmt.Errorf("%s and %s cannot be both set", types.ImageAsset, types.InitrdAsset)
   438  	}
   439  
   440  	return nil
   441  }
   442  
   443  func (s *Sandbox) getAndStoreGuestDetails() error {
   444  	guestDetailRes, err := s.agent.getGuestDetails(&grpc.GuestDetailsRequest{
   445  		MemBlockSize:    true,
   446  		MemHotplugProbe: true,
   447  	})
   448  	if err != nil {
   449  		return err
   450  	}
   451  
   452  	if guestDetailRes != nil {
   453  		s.state.GuestMemoryBlockSizeMB = uint32(guestDetailRes.MemBlockSizeBytes >> 20)
   454  		if guestDetailRes.AgentDetails != nil {
   455  			s.seccompSupported = guestDetailRes.AgentDetails.SupportsSeccomp
   456  		}
   457  		s.state.GuestMemoryHotplugProbe = guestDetailRes.SupportMemHotplugProbe
   458  	}
   459  
   460  	return nil
   461  }
   462  
   463  // createSandbox creates a sandbox from a sandbox description, the containers list, the hypervisor
   464  // and the agent passed through the Config structure.
   465  // It will create and store the sandbox structure, and then ask the hypervisor
   466  // to physically create that sandbox i.e. starts a VM for that sandbox to eventually
   467  // be started.
   468  func createSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factory) (*Sandbox, error) {
   469  	span, ctx := trace(ctx, "createSandbox")
   470  	defer span.Finish()
   471  
   472  	if err := createAssets(ctx, &sandboxConfig); err != nil {
   473  		return nil, err
   474  	}
   475  
   476  	s, err := newSandbox(ctx, sandboxConfig, factory)
   477  	if err != nil {
   478  		return nil, err
   479  	}
   480  
   481  	if len(s.config.Experimental) != 0 {
   482  		s.Logger().WithField("features", s.config.Experimental).Infof("Enable experimental features")
   483  	}
   484  
   485  	// Sandbox state has been loaded from storage.
   486  	// If the Stae is not empty, this is a re-creation, i.e.
   487  	// we don't need to talk to the guest's agent, but only
   488  	// want to create the sandbox and its containers in memory.
   489  	if s.state.State != "" {
   490  		return s, nil
   491  	}
   492  
   493  	// Below code path is called only during create, because of earlier check.
   494  	if err := s.agent.createSandbox(s); err != nil {
   495  		return nil, err
   496  	}
   497  
   498  	// Set sandbox state
   499  	if err := s.setSandboxState(types.StateReady); err != nil {
   500  		return nil, err
   501  	}
   502  
   503  	return s, nil
   504  }
   505  
   506  func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factory) (sb *Sandbox, retErr error) {
   507  	span, ctx := trace(ctx, "newSandbox")
   508  	defer span.Finish()
   509  
   510  	if !sandboxConfig.valid() {
   511  		return nil, fmt.Errorf("Invalid sandbox configuration")
   512  	}
   513  
   514  	agent := newAgent(sandboxConfig.AgentType)
   515  
   516  	hypervisor, err := newHypervisor(sandboxConfig.HypervisorType)
   517  	if err != nil {
   518  		return nil, err
   519  	}
   520  
   521  	s := &Sandbox{
   522  		id:              sandboxConfig.ID,
   523  		factory:         factory,
   524  		hypervisor:      hypervisor,
   525  		agent:           agent,
   526  		config:          &sandboxConfig,
   527  		volumes:         sandboxConfig.Volumes,
   528  		containers:      map[string]*Container{},
   529  		state:           types.SandboxState{BlockIndexMap: make(map[int]struct{})},
   530  		annotationsLock: &sync.RWMutex{},
   531  		wg:              &sync.WaitGroup{},
   532  		shmSize:         sandboxConfig.ShmSize,
   533  		sharePidNs:      sandboxConfig.SharePidNs,
   534  		stateful:        sandboxConfig.Stateful,
   535  		networkNS:       NetworkNamespace{NetNsPath: sandboxConfig.NetworkConfig.NetNSPath},
   536  		ctx:             ctx,
   537  	}
   538  
   539  	if s.newStore, err = persist.GetDriver(); err != nil || s.newStore == nil {
   540  		return nil, fmt.Errorf("failed to get fs persist driver: %v", err)
   541  	}
   542  
   543  	if err = globalSandboxList.addSandbox(s); err != nil {
   544  		s.newStore.Destroy(s.id)
   545  		return nil, err
   546  	}
   547  
   548  	defer func() {
   549  		if retErr != nil {
   550  			s.Logger().WithError(retErr).WithField("sandboxid", s.id).Error("Create new sandbox failed")
   551  			globalSandboxList.removeSandbox(s.id)
   552  			s.newStore.Destroy(s.id)
   553  		}
   554  	}()
   555  
   556  	spec := s.GetPatchedOCISpec()
   557  	if spec != nil && spec.Process.SelinuxLabel != "" {
   558  		sandboxConfig.HypervisorConfig.SELinuxProcessLabel = spec.Process.SelinuxLabel
   559  	}
   560  
   561  	if useOldStore(ctx) {
   562  		vcStore, err := store.NewVCSandboxStore(ctx, s.id)
   563  		if err != nil {
   564  			return nil, err
   565  		}
   566  
   567  		s.store = vcStore
   568  
   569  		// Fetch sandbox network to be able to access it from the sandbox structure.
   570  		var networkNS NetworkNamespace
   571  		if err = s.store.Load(store.Network, &networkNS); err == nil {
   572  			s.networkNS = networkNS
   573  		}
   574  
   575  		devices, err := s.store.LoadDevices()
   576  		if err != nil {
   577  			s.Logger().WithError(err).WithField("sandboxid", s.id).Warning("load sandbox devices failed")
   578  		}
   579  		s.devManager = deviceManager.NewDeviceManager(sandboxConfig.HypervisorConfig.BlockDeviceDriver,
   580  			sandboxConfig.HypervisorConfig.EnableVhostUserStore,
   581  			sandboxConfig.HypervisorConfig.VhostUserStorePath, devices)
   582  
   583  		// Load sandbox state. The hypervisor.createSandbox call, may need to access statei.
   584  		state, err := s.store.LoadState()
   585  		if err == nil {
   586  			s.state = state
   587  		}
   588  
   589  		if err = s.hypervisor.createSandbox(ctx, s.id, s.networkNS, &sandboxConfig.HypervisorConfig, s.stateful); err != nil {
   590  			return nil, err
   591  		}
   592  	} else {
   593  		s.devManager = deviceManager.NewDeviceManager(sandboxConfig.HypervisorConfig.BlockDeviceDriver,
   594  			sandboxConfig.HypervisorConfig.EnableVhostUserStore,
   595  			sandboxConfig.HypervisorConfig.VhostUserStorePath, nil)
   596  
   597  		// Ignore the error. Restore can fail for a new sandbox
   598  		if err := s.Restore(); err != nil {
   599  			s.Logger().WithError(err).Debug("restore sandbox failed")
   600  		}
   601  
   602  		// new store doesn't require hypervisor to be stored immediately
   603  		if err = s.hypervisor.createSandbox(ctx, s.id, s.networkNS, &sandboxConfig.HypervisorConfig, s.stateful); err != nil {
   604  			return nil, err
   605  		}
   606  	}
   607  
   608  	agentConfig, err := newAgentConfig(sandboxConfig.AgentType, sandboxConfig.AgentConfig)
   609  	if err != nil {
   610  		return nil, err
   611  	}
   612  
   613  	if s.disableVMShutdown, err = s.agent.init(ctx, s, agentConfig); err != nil {
   614  		return nil, err
   615  	}
   616  
   617  	return s, nil
   618  }
   619  
   620  func (s *Sandbox) createCgroupManager() error {
   621  	var err error
   622  	cgroupPath := ""
   623  
   624  	// Do not change current cgroup configuration.
   625  	// Create a spec without constraints
   626  	resources := specs.LinuxResources{}
   627  
   628  	if s.config == nil {
   629  		return fmt.Errorf("Could not create cgroup manager: empty sandbox configuration")
   630  	}
   631  
   632  	spec := s.GetPatchedOCISpec()
   633  	if spec != nil && spec.Linux != nil {
   634  		cgroupPath = spec.Linux.CgroupsPath
   635  
   636  		// Kata relies on the cgroup parent created and configured by the container
   637  		// engine by default. The exception is for devices whitelist as well as sandbox-level
   638  		// CPUSet.
   639  		if spec.Linux.Resources != nil {
   640  			resources.Devices = spec.Linux.Resources.Devices
   641  
   642  			if spec.Linux.Resources.CPU != nil {
   643  				resources.CPU = &specs.LinuxCPU{
   644  					Cpus: spec.Linux.Resources.CPU.Cpus,
   645  				}
   646  			}
   647  		}
   648  
   649  		//TODO: in Docker or Podman use case, it is reasonable to set a constraint. Need to add a flag
   650  		// to allow users to configure Kata to constrain CPUs and Memory in this alternative
   651  		// scenario. See https://github.com/kata-containers/runtime/issues/2811
   652  	}
   653  
   654  	if s.devManager != nil {
   655  		for _, d := range s.devManager.GetAllDevices() {
   656  			dev, err := vccgroups.DeviceToLinuxDevice(d.GetHostPath())
   657  			if err != nil {
   658  				s.Logger().WithError(err).WithField("device", d.GetHostPath()).Warn("Could not add device to sandbox resources")
   659  				continue
   660  			}
   661  			resources.Devices = append(resources.Devices, dev)
   662  		}
   663  	}
   664  
   665  	// Create the cgroup manager, this way it can be used later
   666  	// to create or detroy cgroups
   667  	if s.cgroupMgr, err = vccgroups.New(
   668  		&vccgroups.Config{
   669  			Cgroups:     s.config.Cgroups,
   670  			CgroupPaths: s.state.CgroupPaths,
   671  			Resources:   resources,
   672  			CgroupPath:  cgroupPath,
   673  		},
   674  	); err != nil {
   675  		return err
   676  	}
   677  
   678  	return nil
   679  }
   680  
   681  // storeSandbox stores a sandbox config.
   682  func (s *Sandbox) storeSandbox() error {
   683  	span, _ := s.trace("storeSandbox")
   684  	defer span.Finish()
   685  
   686  	// flush data to storage
   687  	if err := s.Save(); err != nil {
   688  		return err
   689  	}
   690  	return nil
   691  }
   692  
   693  func rLockSandbox(sandboxID string) (func() error, error) {
   694  	store, err := persist.GetDriver()
   695  	if err != nil {
   696  		return nil, fmt.Errorf("failed to get fs persist driver: %v", err)
   697  	}
   698  
   699  	return store.Lock(sandboxID, false)
   700  }
   701  
   702  func rwLockSandbox(sandboxID string) (func() error, error) {
   703  	store, err := persist.GetDriver()
   704  	if err != nil {
   705  		return nil, fmt.Errorf("failed to get fs persist driver: %v", err)
   706  	}
   707  
   708  	return store.Lock(sandboxID, true)
   709  }
   710  
   711  // fetchSandbox fetches a sandbox config from a sandbox ID and returns a sandbox.
   712  func fetchSandbox(ctx context.Context, sandboxID string) (sandbox *Sandbox, err error) {
   713  	virtLog.Info("fetch sandbox")
   714  	if sandboxID == "" {
   715  		return nil, vcTypes.ErrNeedSandboxID
   716  	}
   717  
   718  	sandbox, err = globalSandboxList.lookupSandbox(sandboxID)
   719  	if sandbox != nil && err == nil {
   720  		return sandbox, err
   721  	}
   722  
   723  	var config SandboxConfig
   724  
   725  	// Try to load sandbox config from new store at first.
   726  	c, err := loadSandboxConfig(sandboxID)
   727  	if err != nil {
   728  		virtLog.Warningf("failed to get sandbox config from new store: %v", err)
   729  		// If we failed to load sandbox config from new store, try again with old store.
   730  		c, ctx, err = loadSandboxConfigFromOldStore(ctx, sandboxID)
   731  		if err != nil {
   732  			virtLog.Warningf("failed to get sandbox config from old store: %v", err)
   733  			return nil, err
   734  		}
   735  	}
   736  	config = *c
   737  
   738  	if useOldStore(ctx) {
   739  		virtLog.Infof("Warning: old store has been deprecated.")
   740  	}
   741  	// fetchSandbox is not suppose to create new sandbox VM.
   742  	sandbox, err = createSandbox(ctx, config, nil)
   743  	if err != nil {
   744  		return nil, fmt.Errorf("failed to create sandbox with config %+v: %v", config, err)
   745  	}
   746  
   747  	if sandbox.config.SandboxCgroupOnly {
   748  		if err := sandbox.createCgroupManager(); err != nil {
   749  			return nil, err
   750  		}
   751  	}
   752  
   753  	// This sandbox already exists, we don't need to recreate the containers in the guest.
   754  	// We only need to fetch the containers from storage and create the container structs.
   755  	if err := sandbox.fetchContainers(); err != nil {
   756  		return nil, err
   757  	}
   758  
   759  	return sandbox, nil
   760  }
   761  
   762  // findContainer returns a container from the containers list held by the
   763  // sandbox structure, based on a container ID.
   764  func (s *Sandbox) findContainer(containerID string) (*Container, error) {
   765  	if s == nil {
   766  		return nil, vcTypes.ErrNeedSandbox
   767  	}
   768  
   769  	if containerID == "" {
   770  		return nil, vcTypes.ErrNeedContainerID
   771  	}
   772  
   773  	if c, ok := s.containers[containerID]; ok {
   774  		return c, nil
   775  	}
   776  
   777  	return nil, errors.Wrapf(vcTypes.ErrNoSuchContainer, "Could not find the container %q from the sandbox %q containers list",
   778  		containerID, s.id)
   779  }
   780  
   781  // removeContainer removes a container from the containers list held by the
   782  // sandbox structure, based on a container ID.
   783  func (s *Sandbox) removeContainer(containerID string) error {
   784  	if s == nil {
   785  		return vcTypes.ErrNeedSandbox
   786  	}
   787  
   788  	if containerID == "" {
   789  		return vcTypes.ErrNeedContainerID
   790  	}
   791  
   792  	if _, ok := s.containers[containerID]; !ok {
   793  		return errors.Wrapf(vcTypes.ErrNoSuchContainer, "Could not remove the container %q from the sandbox %q containers list",
   794  			containerID, s.id)
   795  	}
   796  
   797  	delete(s.containers, containerID)
   798  
   799  	return nil
   800  }
   801  
   802  // Delete deletes an already created sandbox.
   803  // The VM in which the sandbox is running will be shut down.
   804  func (s *Sandbox) Delete() error {
   805  	if s.state.State != types.StateReady &&
   806  		s.state.State != types.StatePaused &&
   807  		s.state.State != types.StateStopped {
   808  		return fmt.Errorf("Sandbox not ready, paused or stopped, impossible to delete")
   809  	}
   810  
   811  	for _, c := range s.containers {
   812  		if err := c.delete(); err != nil {
   813  			return err
   814  		}
   815  	}
   816  
   817  	if !rootless.IsRootless() {
   818  		if err := s.cgroupsDelete(); err != nil {
   819  			return err
   820  		}
   821  	}
   822  
   823  	globalSandboxList.removeSandbox(s.id)
   824  
   825  	if s.monitor != nil {
   826  		s.monitor.stop()
   827  	}
   828  
   829  	if err := s.hypervisor.cleanup(); err != nil {
   830  		s.Logger().WithError(err).Error("failed to cleanup hypervisor")
   831  	}
   832  
   833  	s.agent.cleanup(s)
   834  	if useOldStore(s.ctx) && s.store != nil {
   835  		if err := s.store.Delete(); err != nil {
   836  			s.Logger().WithError(err).Error("store delete failed")
   837  		}
   838  	}
   839  	return s.newStore.Destroy(s.id)
   840  }
   841  
   842  func (s *Sandbox) startNetworkMonitor() error {
   843  	span, _ := s.trace("startNetworkMonitor")
   844  	defer span.Finish()
   845  
   846  	binPath, err := os.Executable()
   847  	if err != nil {
   848  		return err
   849  	}
   850  
   851  	logLevel := "info"
   852  	if s.config.NetworkConfig.NetmonConfig.Debug {
   853  		logLevel = "debug"
   854  	}
   855  
   856  	params := netmonParams{
   857  		netmonPath: s.config.NetworkConfig.NetmonConfig.Path,
   858  		debug:      s.config.NetworkConfig.NetmonConfig.Debug,
   859  		logLevel:   logLevel,
   860  		runtime:    binPath,
   861  		sandboxID:  s.id,
   862  	}
   863  
   864  	return s.network.Run(s.networkNS.NetNsPath, func() error {
   865  		pid, err := startNetmon(params)
   866  		if err != nil {
   867  			return err
   868  		}
   869  
   870  		s.networkNS.NetmonPID = pid
   871  
   872  		return nil
   873  	})
   874  }
   875  
   876  func (s *Sandbox) createNetwork() error {
   877  	if s.config.NetworkConfig.DisableNewNetNs ||
   878  		s.config.NetworkConfig.NetNSPath == "" {
   879  		return nil
   880  	}
   881  
   882  	span, _ := s.trace("createNetwork")
   883  	defer span.Finish()
   884  
   885  	s.networkNS = NetworkNamespace{
   886  		NetNsPath:    s.config.NetworkConfig.NetNSPath,
   887  		NetNsCreated: s.config.NetworkConfig.NetNsCreated,
   888  	}
   889  
   890  	// In case there is a factory, network interfaces are hotplugged
   891  	// after vm is started.
   892  	if s.factory == nil {
   893  		// Add the network
   894  		endpoints, err := s.network.Add(s.ctx, &s.config.NetworkConfig, s, false)
   895  		if err != nil {
   896  			return err
   897  		}
   898  
   899  		s.networkNS.Endpoints = endpoints
   900  
   901  		if s.config.NetworkConfig.NetmonConfig.Enable {
   902  			if err := s.startNetworkMonitor(); err != nil {
   903  				return err
   904  			}
   905  		}
   906  	}
   907  	return nil
   908  }
   909  
   910  func (s *Sandbox) postCreatedNetwork() error {
   911  
   912  	return s.network.PostAdd(s.ctx, &s.networkNS, s.factory != nil)
   913  }
   914  
   915  func (s *Sandbox) removeNetwork() error {
   916  	span, _ := s.trace("removeNetwork")
   917  	defer span.Finish()
   918  
   919  	if s.config.NetworkConfig.NetmonConfig.Enable {
   920  		if err := stopNetmon(s.networkNS.NetmonPID); err != nil {
   921  			return err
   922  		}
   923  	}
   924  
   925  	return s.network.Remove(s.ctx, &s.networkNS, s.hypervisor)
   926  }
   927  
   928  func (s *Sandbox) generateNetInfo(inf *vcTypes.Interface) (NetworkInfo, error) {
   929  	hw, err := net.ParseMAC(inf.HwAddr)
   930  	if err != nil {
   931  		return NetworkInfo{}, err
   932  	}
   933  
   934  	var addrs []netlink.Addr
   935  	for _, addr := range inf.IPAddresses {
   936  		netlinkAddrStr := fmt.Sprintf("%s/%s", addr.Address, addr.Mask)
   937  		netlinkAddr, err := netlink.ParseAddr(netlinkAddrStr)
   938  		if err != nil {
   939  			return NetworkInfo{}, fmt.Errorf("could not parse %q: %v", netlinkAddrStr, err)
   940  		}
   941  
   942  		addrs = append(addrs, *netlinkAddr)
   943  	}
   944  
   945  	return NetworkInfo{
   946  		Iface: NetlinkIface{
   947  			LinkAttrs: netlink.LinkAttrs{
   948  				Name:         inf.Name,
   949  				HardwareAddr: hw,
   950  				MTU:          int(inf.Mtu),
   951  			},
   952  			Type: inf.LinkType,
   953  		},
   954  		Addrs: addrs,
   955  	}, nil
   956  }
   957  
   958  // AddInterface adds new nic to the sandbox.
   959  func (s *Sandbox) AddInterface(inf *vcTypes.Interface) (*vcTypes.Interface, error) {
   960  	netInfo, err := s.generateNetInfo(inf)
   961  	if err != nil {
   962  		return nil, err
   963  	}
   964  
   965  	endpoint, err := createEndpoint(netInfo, len(s.networkNS.Endpoints), s.config.NetworkConfig.InterworkingModel, nil)
   966  	if err != nil {
   967  		return nil, err
   968  	}
   969  
   970  	endpoint.SetProperties(netInfo)
   971  	if err := doNetNS(s.networkNS.NetNsPath, func(_ ns.NetNS) error {
   972  		s.Logger().WithField("endpoint-type", endpoint.Type()).Info("Hot attaching endpoint")
   973  		return endpoint.HotAttach(s.hypervisor)
   974  	}); err != nil {
   975  		return nil, err
   976  	}
   977  
   978  	// Update the sandbox storage
   979  	s.networkNS.Endpoints = append(s.networkNS.Endpoints, endpoint)
   980  	if err := s.Save(); err != nil {
   981  		return nil, err
   982  	}
   983  
   984  	// Add network for vm
   985  	inf.PciPath = endpoint.PciPath()
   986  	return s.agent.updateInterface(inf)
   987  }
   988  
   989  // RemoveInterface removes a nic of the sandbox.
   990  func (s *Sandbox) RemoveInterface(inf *vcTypes.Interface) (*vcTypes.Interface, error) {
   991  	for i, endpoint := range s.networkNS.Endpoints {
   992  		if endpoint.HardwareAddr() == inf.HwAddr {
   993  			s.Logger().WithField("endpoint-type", endpoint.Type()).Info("Hot detaching endpoint")
   994  			if err := endpoint.HotDetach(s.hypervisor, s.networkNS.NetNsCreated, s.networkNS.NetNsPath); err != nil {
   995  				return inf, err
   996  			}
   997  			s.networkNS.Endpoints = append(s.networkNS.Endpoints[:i], s.networkNS.Endpoints[i+1:]...)
   998  
   999  			if err := s.Save(); err != nil {
  1000  				return inf, err
  1001  			}
  1002  
  1003  			break
  1004  		}
  1005  	}
  1006  	return nil, nil
  1007  }
  1008  
  1009  // ListInterfaces lists all nics and their configurations in the sandbox.
  1010  func (s *Sandbox) ListInterfaces() ([]*vcTypes.Interface, error) {
  1011  	return s.agent.listInterfaces()
  1012  }
  1013  
  1014  // UpdateRoutes updates the sandbox route table (e.g. for portmapping support).
  1015  func (s *Sandbox) UpdateRoutes(routes []*vcTypes.Route) ([]*vcTypes.Route, error) {
  1016  	return s.agent.updateRoutes(routes)
  1017  }
  1018  
  1019  // ListRoutes lists all routes and their configurations in the sandbox.
  1020  func (s *Sandbox) ListRoutes() ([]*vcTypes.Route, error) {
  1021  	return s.agent.listRoutes()
  1022  }
  1023  
  1024  // startVM starts the VM.
  1025  func (s *Sandbox) startVM() (err error) {
  1026  	span, ctx := s.trace("startVM")
  1027  	defer span.Finish()
  1028  
  1029  	s.Logger().Info("Starting VM")
  1030  
  1031  	if err := s.network.Run(s.networkNS.NetNsPath, func() error {
  1032  		if s.factory != nil {
  1033  			vm, err := s.factory.GetVM(ctx, VMConfig{
  1034  				HypervisorType:   s.config.HypervisorType,
  1035  				HypervisorConfig: s.config.HypervisorConfig,
  1036  				AgentType:        s.config.AgentType,
  1037  				AgentConfig:      s.config.AgentConfig,
  1038  				ProxyType:        s.config.ProxyType,
  1039  				ProxyConfig:      s.config.ProxyConfig,
  1040  			})
  1041  			if err != nil {
  1042  				return err
  1043  			}
  1044  
  1045  			return vm.assignSandbox(s)
  1046  		}
  1047  
  1048  		return s.hypervisor.startSandbox(vmStartTimeout)
  1049  	}); err != nil {
  1050  		return err
  1051  	}
  1052  
  1053  	defer func() {
  1054  		if err != nil {
  1055  			s.hypervisor.stopSandbox()
  1056  		}
  1057  	}()
  1058  
  1059  	// In case of vm factory, network interfaces are hotplugged
  1060  	// after vm is started.
  1061  	if s.factory != nil {
  1062  		endpoints, err := s.network.Add(s.ctx, &s.config.NetworkConfig, s, true)
  1063  		if err != nil {
  1064  			return err
  1065  		}
  1066  
  1067  		s.networkNS.Endpoints = endpoints
  1068  
  1069  		if s.config.NetworkConfig.NetmonConfig.Enable {
  1070  			if err := s.startNetworkMonitor(); err != nil {
  1071  				return err
  1072  			}
  1073  		}
  1074  	}
  1075  
  1076  	s.Logger().Info("VM started")
  1077  
  1078  	// Once the hypervisor is done starting the sandbox,
  1079  	// we want to guarantee that it is manageable.
  1080  	// For that we need to ask the agent to start the
  1081  	// sandbox inside the VM.
  1082  	if err := s.agent.startSandbox(s); err != nil {
  1083  		return err
  1084  	}
  1085  
  1086  	s.Logger().Info("Agent started in the sandbox")
  1087  
  1088  	return nil
  1089  }
  1090  
  1091  // stopVM: stop the sandbox's VM
  1092  func (s *Sandbox) stopVM() error {
  1093  	span, _ := s.trace("stopVM")
  1094  	defer span.Finish()
  1095  
  1096  	s.Logger().Info("Stopping sandbox in the VM")
  1097  	if err := s.agent.stopSandbox(s); err != nil {
  1098  		s.Logger().WithError(err).WithField("sandboxid", s.id).Warning("Agent did not stop sandbox")
  1099  	}
  1100  
  1101  	if s.disableVMShutdown {
  1102  		// Do not kill the VM - allow the agent to shut it down
  1103  		// (only used to support static agent tracing).
  1104  		return nil
  1105  	}
  1106  
  1107  	s.Logger().Info("Stopping VM")
  1108  	return s.hypervisor.stopSandbox()
  1109  }
  1110  
  1111  func (s *Sandbox) addContainer(c *Container) error {
  1112  	if _, ok := s.containers[c.id]; ok {
  1113  		return fmt.Errorf("Duplicated container: %s", c.id)
  1114  	}
  1115  	s.containers[c.id] = c
  1116  
  1117  	return nil
  1118  }
  1119  
  1120  // newContainers creates new containers structure and
  1121  // adds them to the sandbox. It does not create the containers
  1122  // in the guest. This should only be used when fetching a
  1123  // sandbox that already exists.
  1124  func (s *Sandbox) fetchContainers() error {
  1125  	for i, contConfig := range s.config.Containers {
  1126  		// Add spec from bundle path
  1127  		spec, err := compatoci.GetContainerSpec(contConfig.Annotations)
  1128  		if err != nil {
  1129  			return err
  1130  		}
  1131  		contConfig.CustomSpec = &spec
  1132  		s.config.Containers[i] = contConfig
  1133  
  1134  		c, err := newContainer(s, &s.config.Containers[i])
  1135  		if err != nil {
  1136  			return err
  1137  		}
  1138  
  1139  		if err := s.addContainer(c); err != nil {
  1140  			return err
  1141  		}
  1142  	}
  1143  
  1144  	return nil
  1145  }
  1146  
  1147  // CreateContainer creates a new container in the sandbox
  1148  // This should be called only when the sandbox is already created.
  1149  // It will add new container config to sandbox.config.Containers
  1150  func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, error) {
  1151  	// Create the container object, add devices to the sandbox's device-manager:
  1152  	c, err := newContainer(s, &contConfig)
  1153  	if err != nil {
  1154  		return nil, err
  1155  	}
  1156  
  1157  	// Update sandbox config to include the new container's config
  1158  	s.config.Containers = append(s.config.Containers, contConfig)
  1159  
  1160  	defer func() {
  1161  		if err != nil {
  1162  			if len(s.config.Containers) > 0 {
  1163  				// delete container config
  1164  				s.config.Containers = s.config.Containers[:len(s.config.Containers)-1]
  1165  				// need to flush change to persist storage
  1166  				if newErr := s.storeSandbox(); newErr != nil {
  1167  					s.Logger().WithError(newErr).Error("Fail to flush s.config.Containers change into sandbox store")
  1168  				}
  1169  			}
  1170  		}
  1171  	}()
  1172  
  1173  	// create and start the container
  1174  	err = c.create()
  1175  	if err != nil {
  1176  		return nil, err
  1177  	}
  1178  
  1179  	// Add the container to the containers list in the sandbox.
  1180  	if err = s.addContainer(c); err != nil {
  1181  		return nil, err
  1182  	}
  1183  
  1184  	defer func() {
  1185  		// Rollback if error happens.
  1186  		if err != nil {
  1187  			logger := s.Logger().WithFields(logrus.Fields{"container-id": c.id, "sandox-id": s.id, "rollback": true})
  1188  
  1189  			logger.Warning("Cleaning up partially created container")
  1190  
  1191  			if err2 := c.stop(true); err2 != nil {
  1192  				logger.WithError(err2).Warning("Could not delete container")
  1193  			}
  1194  
  1195  			logger.Debug("Removing stopped container from sandbox store")
  1196  
  1197  			s.removeContainer(c.id)
  1198  		}
  1199  	}()
  1200  
  1201  	// Sandbox is responsible to update VM resources needed by Containers
  1202  	// Update resources after having added containers to the sandbox, since
  1203  	// container status is requiered to know if more resources should be added.
  1204  	err = s.updateResources()
  1205  	if err != nil {
  1206  		return nil, err
  1207  	}
  1208  
  1209  	if err = s.cgroupsUpdate(); err != nil {
  1210  		return nil, err
  1211  	}
  1212  
  1213  	if err = s.storeSandbox(); err != nil {
  1214  		return nil, err
  1215  	}
  1216  
  1217  	return c, nil
  1218  }
  1219  
  1220  // StartContainer starts a container in the sandbox
  1221  func (s *Sandbox) StartContainer(containerID string) (VCContainer, error) {
  1222  	// Fetch the container.
  1223  	c, err := s.findContainer(containerID)
  1224  	if err != nil {
  1225  		return nil, err
  1226  	}
  1227  
  1228  	// Start it.
  1229  	err = c.start()
  1230  	if err != nil {
  1231  		return nil, err
  1232  	}
  1233  
  1234  	if err = s.storeSandbox(); err != nil {
  1235  		return nil, err
  1236  	}
  1237  
  1238  	s.Logger().Info("Container is started")
  1239  
  1240  	// Update sandbox resources in case a stopped container
  1241  	// is started
  1242  	err = s.updateResources()
  1243  	if err != nil {
  1244  		return nil, err
  1245  	}
  1246  
  1247  	return c, nil
  1248  }
  1249  
  1250  // StopContainer stops a container in the sandbox
  1251  func (s *Sandbox) StopContainer(containerID string, force bool) (VCContainer, error) {
  1252  	// Fetch the container.
  1253  	c, err := s.findContainer(containerID)
  1254  	if err != nil {
  1255  		return nil, err
  1256  	}
  1257  
  1258  	// Stop it.
  1259  	if err := c.stop(force); err != nil {
  1260  		return nil, err
  1261  	}
  1262  
  1263  	if err = s.storeSandbox(); err != nil {
  1264  		return nil, err
  1265  	}
  1266  	return c, nil
  1267  }
  1268  
  1269  // KillContainer signals a container in the sandbox
  1270  func (s *Sandbox) KillContainer(containerID string, signal syscall.Signal, all bool) error {
  1271  	// Fetch the container.
  1272  	c, err := s.findContainer(containerID)
  1273  	if err != nil {
  1274  		return err
  1275  	}
  1276  
  1277  	// Send a signal to the process.
  1278  	err = c.kill(signal, all)
  1279  
  1280  	// SIGKILL should never fail otherwise it is
  1281  	// impossible to clean things up.
  1282  	if signal == syscall.SIGKILL {
  1283  		return nil
  1284  	}
  1285  
  1286  	return err
  1287  }
  1288  
  1289  // DeleteContainer deletes a container from the sandbox
  1290  func (s *Sandbox) DeleteContainer(containerID string) (VCContainer, error) {
  1291  	if containerID == "" {
  1292  		return nil, vcTypes.ErrNeedContainerID
  1293  	}
  1294  
  1295  	// Fetch the container.
  1296  	c, err := s.findContainer(containerID)
  1297  	if err != nil {
  1298  		return nil, err
  1299  	}
  1300  
  1301  	// Delete it.
  1302  	err = c.delete()
  1303  	if err != nil {
  1304  		return nil, err
  1305  	}
  1306  
  1307  	// Update sandbox config
  1308  	for idx, contConfig := range s.config.Containers {
  1309  		if contConfig.ID == containerID {
  1310  			s.config.Containers = append(s.config.Containers[:idx], s.config.Containers[idx+1:]...)
  1311  			break
  1312  		}
  1313  	}
  1314  
  1315  	// update the sandbox cgroup
  1316  	if err = s.cgroupsUpdate(); err != nil {
  1317  		return nil, err
  1318  	}
  1319  
  1320  	if err = s.storeSandbox(); err != nil {
  1321  		return nil, err
  1322  	}
  1323  	return c, nil
  1324  }
  1325  
  1326  // ProcessListContainer lists every process running inside a specific
  1327  // container in the sandbox.
  1328  func (s *Sandbox) ProcessListContainer(containerID string, options ProcessListOptions) (ProcessList, error) {
  1329  	// Fetch the container.
  1330  	c, err := s.findContainer(containerID)
  1331  	if err != nil {
  1332  		return nil, err
  1333  	}
  1334  
  1335  	// Get the process list related to the container.
  1336  	return c.processList(options)
  1337  }
  1338  
  1339  // StatusContainer gets the status of a container
  1340  // TODO: update container status properly, see kata-containers/runtime#253
  1341  func (s *Sandbox) StatusContainer(containerID string) (ContainerStatus, error) {
  1342  	if containerID == "" {
  1343  		return ContainerStatus{}, vcTypes.ErrNeedContainerID
  1344  	}
  1345  
  1346  	if c, ok := s.containers[containerID]; ok {
  1347  		rootfs := c.config.RootFs.Source
  1348  		if c.config.RootFs.Mounted {
  1349  			rootfs = c.config.RootFs.Target
  1350  		}
  1351  
  1352  		return ContainerStatus{
  1353  			ID:          c.id,
  1354  			State:       c.state,
  1355  			PID:         c.process.Pid,
  1356  			StartTime:   c.process.StartTime,
  1357  			RootFs:      rootfs,
  1358  			Annotations: c.config.Annotations,
  1359  		}, nil
  1360  	}
  1361  
  1362  	return ContainerStatus{}, vcTypes.ErrNoSuchContainer
  1363  }
  1364  
  1365  // EnterContainer is the virtcontainers container command execution entry point.
  1366  // EnterContainer enters an already running container and runs a given command.
  1367  func (s *Sandbox) EnterContainer(containerID string, cmd types.Cmd) (VCContainer, *Process, error) {
  1368  	// Fetch the container.
  1369  	c, err := s.findContainer(containerID)
  1370  	if err != nil {
  1371  		return nil, nil, err
  1372  	}
  1373  
  1374  	// Enter it.
  1375  	process, err := c.enter(cmd)
  1376  	if err != nil {
  1377  		return nil, nil, err
  1378  	}
  1379  
  1380  	return c, process, nil
  1381  }
  1382  
  1383  // UpdateContainer update a running container.
  1384  func (s *Sandbox) UpdateContainer(containerID string, resources specs.LinuxResources) error {
  1385  	// Fetch the container.
  1386  	c, err := s.findContainer(containerID)
  1387  	if err != nil {
  1388  		return err
  1389  	}
  1390  
  1391  	err = c.update(resources)
  1392  	if err != nil {
  1393  		return err
  1394  	}
  1395  
  1396  	if err := s.cgroupsUpdate(); err != nil {
  1397  		return err
  1398  	}
  1399  
  1400  	if err = s.storeSandbox(); err != nil {
  1401  		return err
  1402  	}
  1403  	return nil
  1404  }
  1405  
  1406  // StatsContainer return the stats of a running container
  1407  func (s *Sandbox) StatsContainer(containerID string) (ContainerStats, error) {
  1408  	// Fetch the container.
  1409  	c, err := s.findContainer(containerID)
  1410  	if err != nil {
  1411  		return ContainerStats{}, err
  1412  	}
  1413  
  1414  	stats, err := c.stats()
  1415  	if err != nil {
  1416  		return ContainerStats{}, err
  1417  	}
  1418  	return *stats, nil
  1419  }
  1420  
  1421  // Stats returns the stats of a running sandbox
  1422  func (s *Sandbox) Stats() (SandboxStats, error) {
  1423  	if s.state.CgroupPath == "" {
  1424  		return SandboxStats{}, fmt.Errorf("sandbox cgroup path is empty")
  1425  	}
  1426  
  1427  	var path string
  1428  	var cgroupSubsystems cgroups.Hierarchy
  1429  
  1430  	if s.config.SandboxCgroupOnly {
  1431  		cgroupSubsystems = cgroups.V1
  1432  		path = s.state.CgroupPath
  1433  	} else {
  1434  		cgroupSubsystems = V1NoConstraints
  1435  		path = cgroupNoConstraintsPath(s.state.CgroupPath)
  1436  	}
  1437  
  1438  	cgroup, err := cgroupsLoadFunc(cgroupSubsystems, cgroups.StaticPath(path))
  1439  	if err != nil {
  1440  		return SandboxStats{}, fmt.Errorf("Could not load sandbox cgroup in %v: %v", s.state.CgroupPath, err)
  1441  	}
  1442  
  1443  	metrics, err := cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist))
  1444  	if err != nil {
  1445  		return SandboxStats{}, err
  1446  	}
  1447  
  1448  	stats := SandboxStats{}
  1449  
  1450  	stats.CgroupStats.CPUStats.CPUUsage.TotalUsage = metrics.CPU.Usage.Total
  1451  	stats.CgroupStats.MemoryStats.Usage.Usage = metrics.Memory.Usage.Usage
  1452  	tids, err := s.hypervisor.getThreadIDs()
  1453  	if err != nil {
  1454  		return stats, err
  1455  	}
  1456  	stats.Cpus = len(tids.vcpus)
  1457  
  1458  	return stats, nil
  1459  }
  1460  
  1461  // PauseContainer pauses a running container.
  1462  func (s *Sandbox) PauseContainer(containerID string) error {
  1463  	// Fetch the container.
  1464  	c, err := s.findContainer(containerID)
  1465  	if err != nil {
  1466  		return err
  1467  	}
  1468  
  1469  	// Pause the container.
  1470  	if err := c.pause(); err != nil {
  1471  		return err
  1472  	}
  1473  
  1474  	if err = s.storeSandbox(); err != nil {
  1475  		return err
  1476  	}
  1477  	return nil
  1478  }
  1479  
  1480  // ResumeContainer resumes a paused container.
  1481  func (s *Sandbox) ResumeContainer(containerID string) error {
  1482  	// Fetch the container.
  1483  	c, err := s.findContainer(containerID)
  1484  	if err != nil {
  1485  		return err
  1486  	}
  1487  
  1488  	// Resume the container.
  1489  	if err := c.resume(); err != nil {
  1490  		return err
  1491  	}
  1492  
  1493  	if err = s.storeSandbox(); err != nil {
  1494  		return err
  1495  	}
  1496  	return nil
  1497  }
  1498  
  1499  // createContainers registers all containers to the proxy, create the
  1500  // containers in the guest and starts one shim per container.
  1501  func (s *Sandbox) createContainers() error {
  1502  	span, _ := s.trace("createContainers")
  1503  	defer span.Finish()
  1504  
  1505  	for _, contConfig := range s.config.Containers {
  1506  
  1507  		c, err := newContainer(s, &contConfig)
  1508  		if err != nil {
  1509  			return err
  1510  		}
  1511  		if err := c.create(); err != nil {
  1512  			return err
  1513  		}
  1514  
  1515  		if err := s.addContainer(c); err != nil {
  1516  			return err
  1517  		}
  1518  	}
  1519  
  1520  	// Update resources after having added containers to the sandbox, since
  1521  	// container status is requiered to know if more resources should be added.
  1522  	if err := s.updateResources(); err != nil {
  1523  		return err
  1524  	}
  1525  
  1526  	if err := s.cgroupsUpdate(); err != nil {
  1527  		return err
  1528  	}
  1529  	if err := s.storeSandbox(); err != nil {
  1530  		return err
  1531  	}
  1532  
  1533  	return nil
  1534  }
  1535  
  1536  // Start starts a sandbox. The containers that are making the sandbox
  1537  // will be started.
  1538  func (s *Sandbox) Start() error {
  1539  	if err := s.state.ValidTransition(s.state.State, types.StateRunning); err != nil {
  1540  		return err
  1541  	}
  1542  
  1543  	prevState := s.state.State
  1544  
  1545  	if err := s.setSandboxState(types.StateRunning); err != nil {
  1546  		return err
  1547  	}
  1548  
  1549  	var startErr error
  1550  	defer func() {
  1551  		if startErr != nil {
  1552  			s.setSandboxState(prevState)
  1553  		}
  1554  	}()
  1555  	for _, c := range s.containers {
  1556  		if startErr = c.start(); startErr != nil {
  1557  			return startErr
  1558  		}
  1559  	}
  1560  
  1561  	if err := s.storeSandbox(); err != nil {
  1562  		return err
  1563  	}
  1564  
  1565  	s.Logger().Info("Sandbox is started")
  1566  
  1567  	return nil
  1568  }
  1569  
  1570  // Stop stops a sandbox. The containers that are making the sandbox
  1571  // will be destroyed.
  1572  // When force is true, ignore guest related stop failures.
  1573  func (s *Sandbox) Stop(force bool) error {
  1574  	span, _ := s.trace("stop")
  1575  	defer span.Finish()
  1576  
  1577  	if s.state.State == types.StateStopped {
  1578  		s.Logger().Info("sandbox already stopped")
  1579  		return nil
  1580  	}
  1581  
  1582  	if err := s.state.ValidTransition(s.state.State, types.StateStopped); err != nil {
  1583  		return err
  1584  	}
  1585  
  1586  	for _, c := range s.containers {
  1587  		if err := c.stop(force); err != nil {
  1588  			return err
  1589  		}
  1590  	}
  1591  
  1592  	if err := s.stopVM(); err != nil && !force {
  1593  		return err
  1594  	}
  1595  
  1596  	if err := s.setSandboxState(types.StateStopped); err != nil {
  1597  		return err
  1598  	}
  1599  
  1600  	// Remove the network.
  1601  	if err := s.removeNetwork(); err != nil && !force {
  1602  		return err
  1603  	}
  1604  
  1605  	if err := s.storeSandbox(); err != nil {
  1606  		return err
  1607  	}
  1608  
  1609  	// Stop communicating with the agent.
  1610  	if err := s.agent.disconnect(); err != nil && !force {
  1611  		return err
  1612  	}
  1613  
  1614  	return nil
  1615  }
  1616  
  1617  // list lists all sandbox running on the host.
  1618  func (s *Sandbox) list() ([]Sandbox, error) {
  1619  	return nil, nil
  1620  }
  1621  
  1622  // enter runs an executable within a sandbox.
  1623  func (s *Sandbox) enter(args []string) error {
  1624  	return nil
  1625  }
  1626  
  1627  // setSandboxState sets both the in-memory and on-disk state of the
  1628  // sandbox.
  1629  func (s *Sandbox) setSandboxState(state types.StateString) error {
  1630  	if state == "" {
  1631  		return vcTypes.ErrNeedState
  1632  	}
  1633  
  1634  	// update in-memory state
  1635  	s.state.State = state
  1636  
  1637  	if useOldStore(s.ctx) {
  1638  		return s.store.Store(store.State, s.state)
  1639  	}
  1640  	return nil
  1641  }
  1642  
  1643  const maxBlockIndex = 65535
  1644  
  1645  // getAndSetSandboxBlockIndex retrieves an unused sandbox block index from
  1646  // the BlockIndexMap and marks it as used. This index is used to maintain the
  1647  // index at which a block device is assigned to a container in the sandbox.
  1648  func (s *Sandbox) getAndSetSandboxBlockIndex() (int, error) {
  1649  	currentIndex := -1
  1650  	for i := 0; i < maxBlockIndex; i++ {
  1651  		if _, ok := s.state.BlockIndexMap[i]; !ok {
  1652  			currentIndex = i
  1653  			break
  1654  		}
  1655  	}
  1656  	if currentIndex == -1 {
  1657  		return -1, errors.New("no available block index")
  1658  	}
  1659  	s.state.BlockIndexMap[currentIndex] = struct{}{}
  1660  
  1661  	return currentIndex, nil
  1662  }
  1663  
  1664  // unsetSandboxBlockIndex deletes the current sandbox block index from BlockIndexMap.
  1665  // This is used to recover from failure while adding a block device.
  1666  func (s *Sandbox) unsetSandboxBlockIndex(index int) error {
  1667  	var err error
  1668  	original := index
  1669  	delete(s.state.BlockIndexMap, index)
  1670  	defer func() {
  1671  		if err != nil {
  1672  			s.state.BlockIndexMap[original] = struct{}{}
  1673  		}
  1674  	}()
  1675  
  1676  	return nil
  1677  }
  1678  
  1679  // HotplugAddDevice is used for add a device to sandbox
  1680  // Sandbox implement DeviceReceiver interface from device/api/interface.go
  1681  func (s *Sandbox) HotplugAddDevice(device api.Device, devType config.DeviceType) error {
  1682  	span, _ := s.trace("HotplugAddDevice")
  1683  	defer span.Finish()
  1684  
  1685  	if s.config.SandboxCgroupOnly {
  1686  		// We are about to add a device to the hypervisor,
  1687  		// the device cgroup MUST be updated since the hypervisor
  1688  		// will need access to such device
  1689  		hdev := device.GetHostPath()
  1690  		if err := s.cgroupMgr.AddDevice(hdev); err != nil {
  1691  			s.Logger().WithError(err).WithField("device", hdev).
  1692  				Warn("Could not add device to cgroup")
  1693  		}
  1694  	}
  1695  
  1696  	switch devType {
  1697  	case config.DeviceVFIO:
  1698  		vfioDevices, ok := device.GetDeviceInfo().([]*config.VFIODev)
  1699  		if !ok {
  1700  			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
  1701  		}
  1702  
  1703  		// adding a group of VFIO devices
  1704  		for _, dev := range vfioDevices {
  1705  			if _, err := s.hypervisor.hotplugAddDevice(dev, vfioDev); err != nil {
  1706  				s.Logger().
  1707  					WithFields(logrus.Fields{
  1708  						"sandbox":         s.id,
  1709  						"vfio-device-ID":  dev.ID,
  1710  						"vfio-device-BDF": dev.BDF,
  1711  					}).WithError(err).Error("failed to hotplug VFIO device")
  1712  				return err
  1713  			}
  1714  		}
  1715  		return nil
  1716  	case config.DeviceBlock:
  1717  		blockDevice, ok := device.(*drivers.BlockDevice)
  1718  		if !ok {
  1719  			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
  1720  		}
  1721  		_, err := s.hypervisor.hotplugAddDevice(blockDevice.BlockDrive, blockDev)
  1722  		return err
  1723  	case config.VhostUserBlk:
  1724  		vhostUserBlkDevice, ok := device.(*drivers.VhostUserBlkDevice)
  1725  		if !ok {
  1726  			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
  1727  		}
  1728  		_, err := s.hypervisor.hotplugAddDevice(vhostUserBlkDevice.VhostUserDeviceAttrs, vhostuserDev)
  1729  		return err
  1730  	case config.DeviceGeneric:
  1731  		// TODO: what?
  1732  		return nil
  1733  	}
  1734  	return nil
  1735  }
  1736  
  1737  // HotplugRemoveDevice is used for removing a device from sandbox
  1738  // Sandbox implement DeviceReceiver interface from device/api/interface.go
  1739  func (s *Sandbox) HotplugRemoveDevice(device api.Device, devType config.DeviceType) error {
  1740  	defer func() {
  1741  		if s.config.SandboxCgroupOnly {
  1742  			// Remove device from cgroup, the hypervisor
  1743  			// should not have access to such device anymore.
  1744  			hdev := device.GetHostPath()
  1745  			if err := s.cgroupMgr.RemoveDevice(hdev); err != nil {
  1746  				s.Logger().WithError(err).WithField("device", hdev).
  1747  					Warn("Could not remove device from cgroup")
  1748  			}
  1749  		}
  1750  	}()
  1751  
  1752  	switch devType {
  1753  	case config.DeviceVFIO:
  1754  		vfioDevices, ok := device.GetDeviceInfo().([]*config.VFIODev)
  1755  		if !ok {
  1756  			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
  1757  		}
  1758  
  1759  		// remove a group of VFIO devices
  1760  		for _, dev := range vfioDevices {
  1761  			if _, err := s.hypervisor.hotplugRemoveDevice(dev, vfioDev); err != nil {
  1762  				s.Logger().WithError(err).
  1763  					WithFields(logrus.Fields{
  1764  						"sandbox":         s.id,
  1765  						"vfio-device-ID":  dev.ID,
  1766  						"vfio-device-BDF": dev.BDF,
  1767  					}).Error("failed to hot unplug VFIO device")
  1768  				return err
  1769  			}
  1770  		}
  1771  		return nil
  1772  	case config.DeviceBlock:
  1773  		blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive)
  1774  		if !ok {
  1775  			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
  1776  		}
  1777  		_, err := s.hypervisor.hotplugRemoveDevice(blockDrive, blockDev)
  1778  		return err
  1779  	case config.VhostUserBlk:
  1780  		vhostUserDeviceAttrs, ok := device.GetDeviceInfo().(*config.VhostUserDeviceAttrs)
  1781  		if !ok {
  1782  			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
  1783  		}
  1784  		_, err := s.hypervisor.hotplugRemoveDevice(vhostUserDeviceAttrs, vhostuserDev)
  1785  		return err
  1786  	case config.DeviceGeneric:
  1787  		// TODO: what?
  1788  		return nil
  1789  	}
  1790  	return nil
  1791  }
  1792  
  1793  // GetAndSetSandboxBlockIndex is used for getting and setting virtio-block indexes
  1794  // Sandbox implement DeviceReceiver interface from device/api/interface.go
  1795  func (s *Sandbox) GetAndSetSandboxBlockIndex() (int, error) {
  1796  	return s.getAndSetSandboxBlockIndex()
  1797  }
  1798  
  1799  // UnsetSandboxBlockIndex unsets block indexes
  1800  // Sandbox implement DeviceReceiver interface from device/api/interface.go
  1801  func (s *Sandbox) UnsetSandboxBlockIndex(index int) error {
  1802  	return s.unsetSandboxBlockIndex(index)
  1803  }
  1804  
  1805  // AppendDevice can only handle vhost user device currently, it adds a
  1806  // vhost user device to sandbox
  1807  // Sandbox implement DeviceReceiver interface from device/api/interface.go
  1808  func (s *Sandbox) AppendDevice(device api.Device) error {
  1809  	switch device.DeviceType() {
  1810  	case config.VhostUserSCSI, config.VhostUserNet, config.VhostUserBlk, config.VhostUserFS:
  1811  		return s.hypervisor.addDevice(device.GetDeviceInfo().(*config.VhostUserDeviceAttrs), vhostuserDev)
  1812  	case config.DeviceVFIO:
  1813  		vfioDevs := device.GetDeviceInfo().([]*config.VFIODev)
  1814  		for _, d := range vfioDevs {
  1815  			return s.hypervisor.addDevice(*d, vfioDev)
  1816  		}
  1817  	default:
  1818  		s.Logger().WithField("device-type", device.DeviceType()).
  1819  			Warn("Could not append device: unsupported device type")
  1820  	}
  1821  
  1822  	return fmt.Errorf("unsupported device type")
  1823  }
  1824  
  1825  // AddDevice will add a device to sandbox
  1826  func (s *Sandbox) AddDevice(info config.DeviceInfo) (api.Device, error) {
  1827  	if s.devManager == nil {
  1828  		return nil, fmt.Errorf("device manager isn't initialized")
  1829  	}
  1830  
  1831  	var err error
  1832  	b, err := s.devManager.NewDevice(info)
  1833  	if err != nil {
  1834  		return nil, err
  1835  	}
  1836  	defer func() {
  1837  		if err != nil {
  1838  			s.devManager.RemoveDevice(b.DeviceID())
  1839  		}
  1840  	}()
  1841  
  1842  	if err = s.devManager.AttachDevice(b.DeviceID(), s); err != nil {
  1843  		return nil, err
  1844  	}
  1845  	defer func() {
  1846  		if err != nil {
  1847  			s.devManager.DetachDevice(b.DeviceID(), s)
  1848  		}
  1849  	}()
  1850  
  1851  	return b, nil
  1852  }
  1853  
  1854  // updateResources will:
  1855  // - calculate the resources required for the virtual machine, and adjust the virtual machine
  1856  // sizing accordingly. For a given sandbox, it will calculate the number of vCPUs required based
  1857  // on the sum of container requests, plus default CPUs for the VM. Similar is done for memory.
  1858  // If changes in memory or CPU are made, the VM will be updated and the agent will online the
  1859  // applicable CPU and memory.
  1860  func (s *Sandbox) updateResources() error {
  1861  	if s == nil {
  1862  		return errors.New("sandbox is nil")
  1863  	}
  1864  
  1865  	if s.config == nil {
  1866  		return fmt.Errorf("sandbox config is nil")
  1867  	}
  1868  
  1869  	sandboxVCPUs, err := s.calculateSandboxCPUs()
  1870  	if err != nil {
  1871  		return err
  1872  	}
  1873  	// Add default vcpus for sandbox
  1874  	sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs
  1875  
  1876  	sandboxMemoryByte := s.calculateSandboxMemory()
  1877  
  1878  	// Add default / rsvd memory for sandbox.
  1879  	sandboxMemoryByte += uint64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
  1880  
  1881  	// Update VCPUs
  1882  	s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs")
  1883  	oldCPUs, newCPUs, err := s.hypervisor.resizeVCPUs(sandboxVCPUs)
  1884  	if err != nil {
  1885  		return err
  1886  	}
  1887  
  1888  	// If the CPUs were increased, ask agent to online them
  1889  	if oldCPUs < newCPUs {
  1890  		vcpusAdded := newCPUs - oldCPUs
  1891  		if err := s.agent.onlineCPUMem(vcpusAdded, true); err != nil {
  1892  			return err
  1893  		}
  1894  	}
  1895  	s.Logger().Debugf("Sandbox CPUs: %d", newCPUs)
  1896  
  1897  	// Update Memory
  1898  	s.Logger().WithField("memory-sandbox-size-byte", sandboxMemoryByte).Debugf("Request to hypervisor to update memory")
  1899  	newMemoryMB := uint32(sandboxMemoryByte >> utils.MibToBytesShift)
  1900  
  1901  	newMemory, updatedMemoryDevice, err := s.hypervisor.resizeMemory(newMemoryMB, s.state.GuestMemoryBlockSizeMB, s.state.GuestMemoryHotplugProbe)
  1902  	if err != nil {
  1903  		return err
  1904  	}
  1905  	s.Logger().Debugf("Sandbox memory size: %d MB", newMemory)
  1906  	if s.state.GuestMemoryHotplugProbe && updatedMemoryDevice.addr != 0 {
  1907  		// notify the guest kernel about memory hot-add event, before onlining them
  1908  		s.Logger().Debugf("notify guest kernel memory hot-add event via probe interface, memory device located at 0x%x", updatedMemoryDevice.addr)
  1909  		if err := s.agent.memHotplugByProbe(updatedMemoryDevice.addr, uint32(updatedMemoryDevice.sizeMB), s.state.GuestMemoryBlockSizeMB); err != nil {
  1910  			return err
  1911  		}
  1912  	}
  1913  	if err := s.agent.onlineCPUMem(0, false); err != nil {
  1914  		return err
  1915  	}
  1916  	return nil
  1917  }
  1918  
  1919  func (s *Sandbox) calculateSandboxMemory() uint64 {
  1920  	memorySandbox := uint64(0)
  1921  	for _, c := range s.config.Containers {
  1922  		// Do not hot add again non-running containers resources
  1923  		if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped {
  1924  			s.Logger().WithField("container-id", c.ID).Debug("Do not taking into account memory resources of not running containers")
  1925  			continue
  1926  		}
  1927  
  1928  		if m := c.Resources.Memory; m != nil && m.Limit != nil && *m.Limit > 0 {
  1929  			memorySandbox += uint64(*m.Limit)
  1930  			s.Logger().WithField("memory limit", memorySandbox).Info("Memory Sandbox + Memory Limit ")
  1931  		}
  1932  
  1933  		//Add hugepages memory
  1934  		//HugepageLimit is uint64 - https://github.com/opencontainers/runtime-spec/blob/master/specs-go/config.go#L242
  1935  		for _, l := range c.Resources.HugepageLimits {
  1936  			memorySandbox += uint64(l.Limit)
  1937  		}
  1938  	}
  1939  
  1940  	return memorySandbox
  1941  }
  1942  
  1943  func (s *Sandbox) calculateSandboxCPUs() (uint32, error) {
  1944  	mCPU := uint32(0)
  1945  	cpusetCount := int(0)
  1946  
  1947  	for _, c := range s.config.Containers {
  1948  		// Do not hot add again non-running containers resources
  1949  		if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped {
  1950  			s.Logger().WithField("container-id", c.ID).Debug("Do not taking into account CPU resources of not running containers")
  1951  			continue
  1952  		}
  1953  
  1954  		if cpu := c.Resources.CPU; cpu != nil {
  1955  			if cpu.Period != nil && cpu.Quota != nil {
  1956  				mCPU += utils.CalculateMilliCPUs(*cpu.Quota, *cpu.Period)
  1957  			}
  1958  
  1959  			set, err := cpuset.Parse(cpu.Cpus)
  1960  			if err != nil {
  1961  				return 0, nil
  1962  			}
  1963  			cpusetCount += set.Size()
  1964  		}
  1965  	}
  1966  
  1967  	// If we aren't being constrained, then we could have two scenarios:
  1968  	//  1. BestEffort QoS: no proper support today in Kata.
  1969  	//  2. We could be constrained only by CPUSets. Check for this:
  1970  	if mCPU == 0 && cpusetCount > 0 {
  1971  		return uint32(cpusetCount), nil
  1972  	}
  1973  
  1974  	return utils.CalculateVCpusFromMilliCpus(mCPU), nil
  1975  }
  1976  
  1977  // GetHypervisorType is used for getting Hypervisor name currently used.
  1978  // Sandbox implement DeviceReceiver interface from device/api/interface.go
  1979  func (s *Sandbox) GetHypervisorType() string {
  1980  	return string(s.config.HypervisorType)
  1981  }
  1982  
  1983  // cgroupsUpdate will:
  1984  //  1) get the v1constraints cgroup associated with the stored cgroup path
  1985  //  2) (re-)add hypervisor vCPU threads to the appropriate cgroup
  1986  //  3) If we are managing sandbox cgroup, update the v1constraints cgroup size
  1987  func (s *Sandbox) cgroupsUpdate() error {
  1988  
  1989  	// If Kata is configured for SandboxCgroupOnly, the VMM and its processes are already
  1990  	// in the Kata sandbox cgroup (inherited). Check to see if sandbox cpuset needs to be
  1991  	// updated.
  1992  	if s.config.SandboxCgroupOnly {
  1993  		cpuset, memset, err := s.getSandboxCPUSet()
  1994  		if err != nil {
  1995  			return err
  1996  		}
  1997  
  1998  		if err := s.cgroupMgr.SetCPUSet(cpuset, memset); err != nil {
  1999  			return err
  2000  		}
  2001  
  2002  		return nil
  2003  	}
  2004  
  2005  	if s.state.CgroupPath == "" {
  2006  		s.Logger().Warn("sandbox's cgroup won't be updated: cgroup path is empty")
  2007  		return nil
  2008  	}
  2009  
  2010  	cgroup, err := cgroupsLoadFunc(V1Constraints, cgroups.StaticPath(s.state.CgroupPath))
  2011  	if err != nil {
  2012  		return fmt.Errorf("Could not load cgroup %v: %v", s.state.CgroupPath, err)
  2013  	}
  2014  
  2015  	if err := s.constrainHypervisor(cgroup); err != nil {
  2016  		return err
  2017  	}
  2018  
  2019  	if len(s.containers) <= 1 {
  2020  		// nothing to update
  2021  		return nil
  2022  	}
  2023  
  2024  	resources, err := s.resources()
  2025  	if err != nil {
  2026  		return err
  2027  	}
  2028  
  2029  	if err := cgroup.Update(&resources); err != nil {
  2030  		return fmt.Errorf("Could not update sandbox cgroup path='%v' error='%v'", s.state.CgroupPath, err)
  2031  	}
  2032  
  2033  	return nil
  2034  }
  2035  
  2036  // cgroupsDelete will move the running processes in the sandbox cgroup
  2037  // to the parent and then delete the sandbox cgroup
  2038  func (s *Sandbox) cgroupsDelete() error {
  2039  	s.Logger().Debug("Deleting sandbox cgroup")
  2040  	if s.state.CgroupPath == "" {
  2041  		s.Logger().Warnf("sandbox cgroups path is empty")
  2042  		return nil
  2043  	}
  2044  
  2045  	var path string
  2046  	var cgroupSubsystems cgroups.Hierarchy
  2047  
  2048  	if s.config.SandboxCgroupOnly {
  2049  		return s.cgroupMgr.Destroy()
  2050  	}
  2051  
  2052  	cgroupSubsystems = V1NoConstraints
  2053  	path = cgroupNoConstraintsPath(s.state.CgroupPath)
  2054  	s.Logger().WithField("path", path).Debug("Deleting no constraints cgroup")
  2055  
  2056  	sandboxCgroups, err := cgroupsLoadFunc(cgroupSubsystems, cgroups.StaticPath(path))
  2057  	if err == cgroups.ErrCgroupDeleted {
  2058  		// cgroup already deleted
  2059  		s.Logger().Warnf("cgroup already deleted: '%s'", err)
  2060  		return nil
  2061  	}
  2062  
  2063  	if err != nil {
  2064  		return fmt.Errorf("Could not load cgroups %v: %v", path, err)
  2065  	}
  2066  
  2067  	// move running process here, that way cgroup can be removed
  2068  	parent, err := parentCgroup(cgroupSubsystems, path)
  2069  	if err != nil {
  2070  		// parent cgroup doesn't exist, that means there are no process running
  2071  		// and the no constraints cgroup was removed.
  2072  		s.Logger().WithError(err).Warn("Parent cgroup doesn't exist")
  2073  		return nil
  2074  	}
  2075  
  2076  	if err := sandboxCgroups.MoveTo(parent); err != nil {
  2077  		// Don't fail, cgroup can be deleted
  2078  		s.Logger().WithError(err).Warnf("Could not move process from %s to parent cgroup", path)
  2079  	}
  2080  
  2081  	return sandboxCgroups.Delete()
  2082  }
  2083  
  2084  // constrainHypervisor will place the VMM and vCPU threads into cgroups.
  2085  func (s *Sandbox) constrainHypervisor(cgroup cgroups.Cgroup) error {
  2086  	// VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set.
  2087  	// This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take
  2088  	// Kata/VMM into account, Kata may fail to boot due to being overconstrained.
  2089  	// If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained
  2090  	// cgroup
  2091  	if s.config.SandboxCgroupOnly {
  2092  		// Kata components were moved into the sandbox-cgroup already, so VMM
  2093  		// will already land there as well. No need to take action
  2094  		return nil
  2095  	}
  2096  
  2097  	pids := s.hypervisor.getPids()
  2098  	if len(pids) == 0 || pids[0] == 0 {
  2099  		return fmt.Errorf("Invalid hypervisor PID: %+v", pids)
  2100  	}
  2101  
  2102  	// VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set.
  2103  	// This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take
  2104  	// Kata/VMM into account, Kata may fail to boot due to being overconstrained.
  2105  	// If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained
  2106  	// cgroup
  2107  	// Move the VMM into cgroups without constraints, those cgroups are not yet supported.
  2108  	resources := &specs.LinuxResources{}
  2109  	path := cgroupNoConstraintsPath(s.state.CgroupPath)
  2110  	vmmCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources)
  2111  	if err != nil {
  2112  		return fmt.Errorf("Could not create cgroup %v: %v", path, err)
  2113  	}
  2114  
  2115  	for _, pid := range pids {
  2116  		if pid <= 0 {
  2117  			s.Logger().Warnf("Invalid hypervisor pid: %d", pid)
  2118  			continue
  2119  		}
  2120  
  2121  		if err := vmmCgroup.Add(cgroups.Process{Pid: pid}); err != nil {
  2122  			return fmt.Errorf("Could not add hypervisor PID %d to cgroup: %v", pid, err)
  2123  		}
  2124  	}
  2125  
  2126  	// when new container joins, new CPU could be hotplugged, so we
  2127  	// have to query fresh vcpu info from hypervisor every time.
  2128  	tids, err := s.hypervisor.getThreadIDs()
  2129  	if err != nil {
  2130  		return fmt.Errorf("failed to get thread ids from hypervisor: %v", err)
  2131  	}
  2132  	if len(tids.vcpus) == 0 {
  2133  		// If there's no tid returned from the hypervisor, this is not
  2134  		// a bug. It simply means there is nothing to constrain, hence
  2135  		// let's return without any error from here.
  2136  		return nil
  2137  	}
  2138  
  2139  	// Move vcpus (threads) into cgroups with constraints.
  2140  	// Move whole hypervisor process would be easier but the IO/network performance
  2141  	// would be over-constrained.
  2142  	for _, i := range tids.vcpus {
  2143  		// In contrast, AddTask will write thread id to `tasks`
  2144  		// After this, vcpu threads are in "vcpu" sub-cgroup, other threads in
  2145  		// qemu will be left in parent cgroup untouched.
  2146  		if err := cgroup.AddTask(cgroups.Process{
  2147  			Pid: i,
  2148  		}); err != nil {
  2149  			return err
  2150  		}
  2151  	}
  2152  
  2153  	return nil
  2154  }
  2155  
  2156  func (s *Sandbox) resources() (specs.LinuxResources, error) {
  2157  	resources := specs.LinuxResources{
  2158  		CPU: s.cpuResources(),
  2159  	}
  2160  
  2161  	return resources, nil
  2162  }
  2163  
  2164  func (s *Sandbox) cpuResources() *specs.LinuxCPU {
  2165  	// Use default period and quota if they are not specified.
  2166  	// Container will inherit the constraints from its parent.
  2167  	quota := int64(0)
  2168  	period := uint64(0)
  2169  	shares := uint64(0)
  2170  	realtimePeriod := uint64(0)
  2171  	realtimeRuntime := int64(0)
  2172  
  2173  	cpu := &specs.LinuxCPU{
  2174  		Quota:           &quota,
  2175  		Period:          &period,
  2176  		Shares:          &shares,
  2177  		RealtimePeriod:  &realtimePeriod,
  2178  		RealtimeRuntime: &realtimeRuntime,
  2179  	}
  2180  
  2181  	for _, c := range s.containers {
  2182  		ann := c.GetAnnotations()
  2183  		if ann[annotations.ContainerTypeKey] == string(PodSandbox) {
  2184  			// skip sandbox container
  2185  			continue
  2186  		}
  2187  
  2188  		if c.config.Resources.CPU == nil {
  2189  			continue
  2190  		}
  2191  
  2192  		if c.config.Resources.CPU.Shares != nil {
  2193  			shares = uint64(math.Max(float64(*c.config.Resources.CPU.Shares), float64(shares)))
  2194  		}
  2195  
  2196  		if c.config.Resources.CPU.Quota != nil {
  2197  			quota += *c.config.Resources.CPU.Quota
  2198  		}
  2199  
  2200  		if c.config.Resources.CPU.Period != nil {
  2201  			period = uint64(math.Max(float64(*c.config.Resources.CPU.Period), float64(period)))
  2202  		}
  2203  
  2204  		if c.config.Resources.CPU.Cpus != "" {
  2205  			cpu.Cpus += c.config.Resources.CPU.Cpus + ","
  2206  		}
  2207  
  2208  		if c.config.Resources.CPU.RealtimeRuntime != nil {
  2209  			realtimeRuntime += *c.config.Resources.CPU.RealtimeRuntime
  2210  		}
  2211  
  2212  		if c.config.Resources.CPU.RealtimePeriod != nil {
  2213  			realtimePeriod += *c.config.Resources.CPU.RealtimePeriod
  2214  		}
  2215  
  2216  		if c.config.Resources.CPU.Mems != "" {
  2217  			cpu.Mems += c.config.Resources.CPU.Mems + ","
  2218  		}
  2219  	}
  2220  
  2221  	cpu.Cpus = strings.Trim(cpu.Cpus, " \n\t,")
  2222  
  2223  	return validCPUResources(cpu)
  2224  }
  2225  
  2226  // setupSandboxCgroup creates and joins sandbox cgroups for the sandbox config
  2227  func (s *Sandbox) setupSandboxCgroup() error {
  2228  	var err error
  2229  	spec := s.GetPatchedOCISpec()
  2230  	if spec == nil {
  2231  		return errorMissingOCISpec
  2232  	}
  2233  
  2234  	if spec.Linux == nil {
  2235  		s.Logger().WithField("sandboxid", s.id).Warning("no cgroup path provided for pod sandbox, not creating sandbox cgroup")
  2236  		return nil
  2237  	}
  2238  
  2239  	s.state.CgroupPath, err = vccgroups.ValidCgroupPath(spec.Linux.CgroupsPath, s.config.SystemdCgroup)
  2240  	if err != nil {
  2241  		return fmt.Errorf("Invalid cgroup path: %v", err)
  2242  	}
  2243  
  2244  	runtimePid := os.Getpid()
  2245  	// Add the runtime to the Kata sandbox cgroup
  2246  	if err = s.cgroupMgr.Add(runtimePid); err != nil {
  2247  		return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup:  %v", runtimePid, err)
  2248  	}
  2249  
  2250  	// `Apply` updates manager's Cgroups and CgroupPaths,
  2251  	// they both need to be saved since are used to create
  2252  	// or restore a cgroup managers.
  2253  	if s.config.Cgroups, err = s.cgroupMgr.GetCgroups(); err != nil {
  2254  		return fmt.Errorf("Could not get cgroup configuration:  %v", err)
  2255  	}
  2256  
  2257  	s.state.CgroupPaths = s.cgroupMgr.GetPaths()
  2258  
  2259  	if err = s.cgroupMgr.Apply(); err != nil {
  2260  		return fmt.Errorf("Could not constrain cgroup: %v", err)
  2261  	}
  2262  
  2263  	return nil
  2264  }
  2265  
  2266  // GetPatchedOCISpec returns sandbox's OCI specification
  2267  // This OCI specification was patched when the sandbox was created
  2268  // by containerCapabilities(), SetEphemeralStorageType() and others
  2269  // in order to support:
  2270  // * capabilities
  2271  // * Ephemeral storage
  2272  // * k8s empty dir
  2273  // If you need the original (vanilla) OCI spec,
  2274  // use compatoci.GetContainerSpec() instead.
  2275  func (s *Sandbox) GetPatchedOCISpec() *specs.Spec {
  2276  	if s.config == nil {
  2277  		return nil
  2278  	}
  2279  
  2280  	// get the container associated with the PodSandbox annotation. In Kubernetes, this
  2281  	// represents the pause container. In Docker, this is the container. We derive the
  2282  	// cgroup path from this container.
  2283  	for _, cConfig := range s.config.Containers {
  2284  		if cConfig.Annotations[annotations.ContainerTypeKey] == string(PodSandbox) {
  2285  			return cConfig.CustomSpec
  2286  		}
  2287  	}
  2288  
  2289  	return nil
  2290  }
  2291  
  2292  func (s *Sandbox) GetOOMEvent() (string, error) {
  2293  	return s.agent.getOOMEvent()
  2294  }
  2295  
  2296  // getSandboxCPUSet returns the union of each of the sandbox's containers' CPU sets'
  2297  // cpus and mems as a string in canonical linux CPU/mems list format
  2298  func (s *Sandbox) getSandboxCPUSet() (string, string, error) {
  2299  	if s.config == nil {
  2300  		return "", "", nil
  2301  	}
  2302  
  2303  	cpuResult := cpuset.NewCPUSet()
  2304  	memResult := cpuset.NewCPUSet()
  2305  	for _, ctr := range s.config.Containers {
  2306  		if ctr.Resources.CPU != nil {
  2307  			currCPUSet, err := cpuset.Parse(ctr.Resources.CPU.Cpus)
  2308  			if err != nil {
  2309  				return "", "", fmt.Errorf("unable to parse CPUset.cpus for container %s: %v", ctr.ID, err)
  2310  			}
  2311  			cpuResult = cpuResult.Union(currCPUSet)
  2312  
  2313  			currMemSet, err := cpuset.Parse(ctr.Resources.CPU.Mems)
  2314  			if err != nil {
  2315  				return "", "", fmt.Errorf("unable to parse CPUset.mems for container %s: %v", ctr.ID, err)
  2316  			}
  2317  			memResult = memResult.Union(currMemSet)
  2318  		}
  2319  	}
  2320  
  2321  	return cpuResult.String(), memResult.String(), nil
  2322  }
  2323  
  2324  // GetSandboxBlockOffset returns an offset w.r.t. the sandbox block index, to be
  2325  // used when determining a virtio-block drive name. An offset may be present if
  2326  // specific drive names are reserved, e.g. for a sandbox rootfs, but not
  2327  // included in the BlockIndexMap.
  2328  func (s *Sandbox) GetSandboxBlockOffset() int {
  2329  	return s.hypervisor.getVirtDriveOffset()
  2330  }