gitee.com/leisunstar/runtime@v0.0.0-20200521203717-5cef3e7b53f9/virtcontainers/sandbox.go (about)

     1  // Copyright (c) 2016 Intel Corporation
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  //
     5  
     6  package virtcontainers
     7  
     8  import (
     9  	"context"
    10  	"fmt"
    11  	"io"
    12  	"math"
    13  	"net"
    14  	"os"
    15  	"strings"
    16  	"sync"
    17  	"syscall"
    18  
    19  	"github.com/containerd/cgroups"
    20  	"github.com/containernetworking/plugins/pkg/ns"
    21  	"github.com/opencontainers/runc/libcontainer/configs"
    22  	specs "github.com/opencontainers/runtime-spec/specs-go"
    23  	opentracing "github.com/opentracing/opentracing-go"
    24  	"github.com/pkg/errors"
    25  	"github.com/sirupsen/logrus"
    26  	"github.com/vishvananda/netlink"
    27  
    28  	"github.com/kata-containers/agent/protocols/grpc"
    29  	"github.com/kata-containers/runtime/virtcontainers/device/api"
    30  	"github.com/kata-containers/runtime/virtcontainers/device/config"
    31  	"github.com/kata-containers/runtime/virtcontainers/device/drivers"
    32  	deviceManager "github.com/kata-containers/runtime/virtcontainers/device/manager"
    33  	exp "github.com/kata-containers/runtime/virtcontainers/experimental"
    34  	"github.com/kata-containers/runtime/virtcontainers/persist"
    35  	persistapi "github.com/kata-containers/runtime/virtcontainers/persist/api"
    36  	"github.com/kata-containers/runtime/virtcontainers/pkg/annotations"
    37  	vccgroups "github.com/kata-containers/runtime/virtcontainers/pkg/cgroups"
    38  	"github.com/kata-containers/runtime/virtcontainers/pkg/compatoci"
    39  	"github.com/kata-containers/runtime/virtcontainers/pkg/rootless"
    40  	vcTypes "github.com/kata-containers/runtime/virtcontainers/pkg/types"
    41  	"github.com/kata-containers/runtime/virtcontainers/store"
    42  	"github.com/kata-containers/runtime/virtcontainers/types"
    43  	"github.com/kata-containers/runtime/virtcontainers/utils"
    44  )
    45  
    46  const (
    47  	// vmStartTimeout represents the time in seconds a sandbox can wait before
    48  	// to consider the VM starting operation failed.
    49  	vmStartTimeout = 10
    50  
    51  	// DirMode is the permission bits used for creating a directory
    52  	DirMode = os.FileMode(0750) | os.ModeDir
    53  )
    54  
    55  // SandboxStatus describes a sandbox status.
    56  type SandboxStatus struct {
    57  	ID               string
    58  	State            types.SandboxState
    59  	Hypervisor       HypervisorType
    60  	HypervisorConfig HypervisorConfig
    61  	Agent            AgentType
    62  	ContainersStatus []ContainerStatus
    63  
    64  	// Annotations allow clients to store arbitrary values,
    65  	// for example to add additional status values required
    66  	// to support particular specifications.
    67  	Annotations map[string]string
    68  }
    69  
    70  // SandboxStats describes a sandbox's stats
    71  type SandboxStats struct {
    72  	CgroupStats CgroupStats
    73  	Cpus        int
    74  }
    75  
    76  // SandboxConfig is a Sandbox configuration.
    77  type SandboxConfig struct {
    78  	ID string
    79  
    80  	Hostname string
    81  
    82  	HypervisorType   HypervisorType
    83  	HypervisorConfig HypervisorConfig
    84  
    85  	AgentType   AgentType
    86  	AgentConfig interface{}
    87  
    88  	ProxyType   ProxyType
    89  	ProxyConfig ProxyConfig
    90  
    91  	ShimType   ShimType
    92  	ShimConfig interface{}
    93  
    94  	NetworkConfig NetworkConfig
    95  
    96  	// Volumes is a list of shared volumes between the host and the Sandbox.
    97  	Volumes []types.Volume
    98  
    99  	// Containers describe the list of containers within a Sandbox.
   100  	// This list can be empty and populated by adding containers
   101  	// to the Sandbox a posteriori.
   102  	//TODO: this should be a map to avoid duplicated containers
   103  	Containers []ContainerConfig
   104  
   105  	// Annotations keys must be unique strings and must be name-spaced
   106  	// with e.g. reverse domain notation (org.clearlinux.key).
   107  	Annotations map[string]string
   108  
   109  	ShmSize uint64
   110  
   111  	// SharePidNs sets all containers to share the same sandbox level pid namespace.
   112  	SharePidNs bool
   113  
   114  	// types.Stateful keeps sandbox resources in memory across APIs. Users will be responsible
   115  	// for calling Release() to release the memory resources.
   116  	Stateful bool
   117  
   118  	// SystemdCgroup enables systemd cgroup support
   119  	SystemdCgroup bool
   120  
   121  	// SandboxCgroupOnly enables cgroup only at podlevel in the host
   122  	SandboxCgroupOnly bool
   123  
   124  	DisableGuestSeccomp bool
   125  
   126  	// Experimental features enabled
   127  	Experimental []exp.Feature
   128  
   129  	// Cgroups specifies specific cgroup settings for the various subsystems that the container is
   130  	// placed into to limit the resources the container has available
   131  	Cgroups *configs.Cgroup
   132  }
   133  
   134  func (s *Sandbox) trace(name string) (opentracing.Span, context.Context) {
   135  	if s.ctx == nil {
   136  		s.Logger().WithField("type", "bug").Error("trace called before context set")
   137  		s.ctx = context.Background()
   138  	}
   139  
   140  	span, ctx := opentracing.StartSpanFromContext(s.ctx, name)
   141  
   142  	span.SetTag("subsystem", "sandbox")
   143  
   144  	return span, ctx
   145  }
   146  
   147  func (s *Sandbox) startProxy() error {
   148  
   149  	// If the proxy is KataBuiltInProxyType type, it needs to restart the proxy
   150  	// to watch the guest console if it hadn't been watched.
   151  	if s.agent == nil {
   152  		return fmt.Errorf("sandbox %s missed agent pointer", s.ID())
   153  	}
   154  
   155  	return s.agent.startProxy(s)
   156  }
   157  
   158  // valid checks that the sandbox configuration is valid.
   159  func (sandboxConfig *SandboxConfig) valid() bool {
   160  	if sandboxConfig.ID == "" {
   161  		return false
   162  	}
   163  
   164  	if _, err := newHypervisor(sandboxConfig.HypervisorType); err != nil {
   165  		sandboxConfig.HypervisorType = QemuHypervisor
   166  	}
   167  
   168  	// validate experimental features
   169  	for _, f := range sandboxConfig.Experimental {
   170  		if exp.Get(f.Name) == nil {
   171  			return false
   172  		}
   173  	}
   174  	return true
   175  }
   176  
   177  // Sandbox is composed of a set of containers and a runtime environment.
   178  // A Sandbox can be created, deleted, started, paused, stopped, listed, entered, and restored.
   179  type Sandbox struct {
   180  	id string
   181  
   182  	sync.Mutex
   183  	factory    Factory
   184  	hypervisor hypervisor
   185  	agent      agent
   186  	store      *store.VCStore
   187  	// store is used to replace VCStore step by step
   188  	newStore persistapi.PersistDriver
   189  
   190  	network Network
   191  	monitor *monitor
   192  
   193  	config *SandboxConfig
   194  
   195  	devManager api.DeviceManager
   196  
   197  	volumes []types.Volume
   198  
   199  	containers map[string]*Container
   200  
   201  	state types.SandboxState
   202  
   203  	networkNS NetworkNamespace
   204  
   205  	annotationsLock *sync.RWMutex
   206  
   207  	wg *sync.WaitGroup
   208  
   209  	shmSize           uint64
   210  	sharePidNs        bool
   211  	stateful          bool
   212  	seccompSupported  bool
   213  	disableVMShutdown bool
   214  
   215  	cgroupMgr *vccgroups.Manager
   216  
   217  	ctx context.Context
   218  }
   219  
   220  // ID returns the sandbox identifier string.
   221  func (s *Sandbox) ID() string {
   222  	return s.id
   223  }
   224  
   225  // Logger returns a logrus logger appropriate for logging Sandbox messages
   226  func (s *Sandbox) Logger() *logrus.Entry {
   227  	return virtLog.WithFields(logrus.Fields{
   228  		"subsystem": "sandbox",
   229  		"sandbox":   s.id,
   230  	})
   231  }
   232  
   233  // Annotations returns any annotation that a user could have stored through the sandbox.
   234  func (s *Sandbox) Annotations(key string) (string, error) {
   235  	s.annotationsLock.RLock()
   236  	defer s.annotationsLock.RUnlock()
   237  
   238  	value, exist := s.config.Annotations[key]
   239  	if !exist {
   240  		return "", fmt.Errorf("Annotations key %s does not exist", key)
   241  	}
   242  
   243  	return value, nil
   244  }
   245  
   246  // SetAnnotations sets or adds an annotations
   247  func (s *Sandbox) SetAnnotations(annotations map[string]string) error {
   248  	s.annotationsLock.Lock()
   249  	defer s.annotationsLock.Unlock()
   250  
   251  	for k, v := range annotations {
   252  		s.config.Annotations[k] = v
   253  	}
   254  	return nil
   255  }
   256  
   257  // GetAnnotations returns sandbox's annotations
   258  func (s *Sandbox) GetAnnotations() map[string]string {
   259  	s.annotationsLock.RLock()
   260  	defer s.annotationsLock.RUnlock()
   261  
   262  	return s.config.Annotations
   263  }
   264  
   265  // GetNetNs returns the network namespace of the current sandbox.
   266  func (s *Sandbox) GetNetNs() string {
   267  	return s.networkNS.NetNsPath
   268  }
   269  
   270  // GetAllContainers returns all containers.
   271  func (s *Sandbox) GetAllContainers() []VCContainer {
   272  	ifa := make([]VCContainer, len(s.containers))
   273  
   274  	i := 0
   275  	for _, v := range s.containers {
   276  		ifa[i] = v
   277  		i++
   278  	}
   279  
   280  	return ifa
   281  }
   282  
   283  // GetContainer returns the container named by the containerID.
   284  func (s *Sandbox) GetContainer(containerID string) VCContainer {
   285  	if c, ok := s.containers[containerID]; ok {
   286  		return c
   287  	}
   288  	return nil
   289  }
   290  
   291  // Release closes the agent connection and removes sandbox from internal list.
   292  func (s *Sandbox) Release() error {
   293  	s.Logger().Info("release sandbox")
   294  	globalSandboxList.removeSandbox(s.id)
   295  	if s.monitor != nil {
   296  		s.monitor.stop()
   297  	}
   298  	s.hypervisor.disconnect()
   299  	return s.agent.disconnect()
   300  }
   301  
   302  func (s *Sandbox) releaseStatelessSandbox() error {
   303  	if s.stateful {
   304  		return nil
   305  	}
   306  
   307  	return s.Release()
   308  }
   309  
   310  // Status gets the status of the sandbox
   311  // TODO: update container status properly, see kata-containers/runtime#253
   312  func (s *Sandbox) Status() SandboxStatus {
   313  	var contStatusList []ContainerStatus
   314  	for _, c := range s.containers {
   315  		rootfs := c.config.RootFs.Source
   316  		if c.config.RootFs.Mounted {
   317  			rootfs = c.config.RootFs.Target
   318  		}
   319  
   320  		contStatusList = append(contStatusList, ContainerStatus{
   321  			ID:          c.id,
   322  			State:       c.state,
   323  			PID:         c.process.Pid,
   324  			StartTime:   c.process.StartTime,
   325  			RootFs:      rootfs,
   326  			Annotations: c.config.Annotations,
   327  		})
   328  	}
   329  
   330  	return SandboxStatus{
   331  		ID:               s.id,
   332  		State:            s.state,
   333  		Hypervisor:       s.config.HypervisorType,
   334  		HypervisorConfig: s.config.HypervisorConfig,
   335  		Agent:            s.config.AgentType,
   336  		ContainersStatus: contStatusList,
   337  		Annotations:      s.config.Annotations,
   338  	}
   339  }
   340  
   341  // Monitor returns a error channel for watcher to watch at
   342  func (s *Sandbox) Monitor() (chan error, error) {
   343  	if s.state.State != types.StateRunning {
   344  		return nil, fmt.Errorf("Sandbox is not running")
   345  	}
   346  
   347  	s.Lock()
   348  	if s.monitor == nil {
   349  		s.monitor = newMonitor(s)
   350  	}
   351  	s.Unlock()
   352  
   353  	return s.monitor.newWatcher()
   354  }
   355  
   356  // WaitProcess waits on a container process and return its exit code
   357  func (s *Sandbox) WaitProcess(containerID, processID string) (int32, error) {
   358  	if s.state.State != types.StateRunning {
   359  		return 0, fmt.Errorf("Sandbox not running")
   360  	}
   361  
   362  	c, err := s.findContainer(containerID)
   363  	if err != nil {
   364  		return 0, err
   365  	}
   366  
   367  	return c.wait(processID)
   368  }
   369  
   370  // SignalProcess sends a signal to a process of a container when all is false.
   371  // When all is true, it sends the signal to all processes of a container.
   372  func (s *Sandbox) SignalProcess(containerID, processID string, signal syscall.Signal, all bool) error {
   373  	if s.state.State != types.StateRunning {
   374  		return fmt.Errorf("Sandbox not running")
   375  	}
   376  
   377  	c, err := s.findContainer(containerID)
   378  	if err != nil {
   379  		return err
   380  	}
   381  
   382  	return c.signalProcess(processID, signal, all)
   383  }
   384  
   385  // WinsizeProcess resizes the tty window of a process
   386  func (s *Sandbox) WinsizeProcess(containerID, processID string, height, width uint32) error {
   387  	if s.state.State != types.StateRunning {
   388  		return fmt.Errorf("Sandbox not running")
   389  	}
   390  
   391  	c, err := s.findContainer(containerID)
   392  	if err != nil {
   393  		return err
   394  	}
   395  
   396  	return c.winsizeProcess(processID, height, width)
   397  }
   398  
   399  // IOStream returns stdin writer, stdout reader and stderr reader of a process
   400  func (s *Sandbox) IOStream(containerID, processID string) (io.WriteCloser, io.Reader, io.Reader, error) {
   401  	if s.state.State != types.StateRunning {
   402  		return nil, nil, nil, fmt.Errorf("Sandbox not running")
   403  	}
   404  
   405  	c, err := s.findContainer(containerID)
   406  	if err != nil {
   407  		return nil, nil, nil, err
   408  	}
   409  
   410  	return c.ioStream(processID)
   411  }
   412  
   413  func createAssets(ctx context.Context, sandboxConfig *SandboxConfig) error {
   414  	span, _ := trace(ctx, "createAssets")
   415  	defer span.Finish()
   416  
   417  	kernel, err := types.NewAsset(sandboxConfig.Annotations, types.KernelAsset)
   418  	if err != nil {
   419  		return err
   420  	}
   421  
   422  	image, err := types.NewAsset(sandboxConfig.Annotations, types.ImageAsset)
   423  	if err != nil {
   424  		return err
   425  	}
   426  
   427  	initrd, err := types.NewAsset(sandboxConfig.Annotations, types.InitrdAsset)
   428  	if err != nil {
   429  		return err
   430  	}
   431  
   432  	if image != nil && initrd != nil {
   433  		return fmt.Errorf("%s and %s cannot be both set", types.ImageAsset, types.InitrdAsset)
   434  	}
   435  
   436  	for _, a := range []*types.Asset{kernel, image, initrd} {
   437  		if err := sandboxConfig.HypervisorConfig.addCustomAsset(a); err != nil {
   438  			return err
   439  		}
   440  	}
   441  
   442  	return nil
   443  }
   444  
   445  func (s *Sandbox) getAndStoreGuestDetails() error {
   446  	guestDetailRes, err := s.agent.getGuestDetails(&grpc.GuestDetailsRequest{
   447  		MemBlockSize:    true,
   448  		MemHotplugProbe: true,
   449  	})
   450  	if err != nil {
   451  		return err
   452  	}
   453  
   454  	if guestDetailRes != nil {
   455  		s.state.GuestMemoryBlockSizeMB = uint32(guestDetailRes.MemBlockSizeBytes >> 20)
   456  		if guestDetailRes.AgentDetails != nil {
   457  			s.seccompSupported = guestDetailRes.AgentDetails.SupportsSeccomp
   458  		}
   459  		s.state.GuestMemoryHotplugProbe = guestDetailRes.SupportMemHotplugProbe
   460  	}
   461  
   462  	return nil
   463  }
   464  
   465  // createSandbox creates a sandbox from a sandbox description, the containers list, the hypervisor
   466  // and the agent passed through the Config structure.
   467  // It will create and store the sandbox structure, and then ask the hypervisor
   468  // to physically create that sandbox i.e. starts a VM for that sandbox to eventually
   469  // be started.
   470  func createSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factory) (*Sandbox, error) {
   471  	span, ctx := trace(ctx, "createSandbox")
   472  	defer span.Finish()
   473  
   474  	if err := createAssets(ctx, &sandboxConfig); err != nil {
   475  		return nil, err
   476  	}
   477  
   478  	s, err := newSandbox(ctx, sandboxConfig, factory)
   479  	if err != nil {
   480  		return nil, err
   481  	}
   482  
   483  	if len(s.config.Experimental) != 0 {
   484  		s.Logger().WithField("features", s.config.Experimental).Infof("Enable experimental features")
   485  	}
   486  
   487  	// Sandbox state has been loaded from storage.
   488  	// If the Stae is not empty, this is a re-creation, i.e.
   489  	// we don't need to talk to the guest's agent, but only
   490  	// want to create the sandbox and its containers in memory.
   491  	if s.state.State != "" {
   492  		return s, nil
   493  	}
   494  
   495  	// Below code path is called only during create, because of earlier check.
   496  	if err := s.agent.createSandbox(s); err != nil {
   497  		return nil, err
   498  	}
   499  
   500  	// Set sandbox state
   501  	if err := s.setSandboxState(types.StateReady); err != nil {
   502  		return nil, err
   503  	}
   504  
   505  	return s, nil
   506  }
   507  
   508  func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factory) (*Sandbox, error) {
   509  	span, ctx := trace(ctx, "newSandbox")
   510  	defer span.Finish()
   511  
   512  	if !sandboxConfig.valid() {
   513  		return nil, fmt.Errorf("Invalid sandbox configuration")
   514  	}
   515  
   516  	agent := newAgent(sandboxConfig.AgentType)
   517  
   518  	hypervisor, err := newHypervisor(sandboxConfig.HypervisorType)
   519  	if err != nil {
   520  		return nil, err
   521  	}
   522  
   523  	s := &Sandbox{
   524  		id:              sandboxConfig.ID,
   525  		factory:         factory,
   526  		hypervisor:      hypervisor,
   527  		agent:           agent,
   528  		config:          &sandboxConfig,
   529  		volumes:         sandboxConfig.Volumes,
   530  		containers:      map[string]*Container{},
   531  		state:           types.SandboxState{BlockIndexMap: make(map[int]struct{})},
   532  		annotationsLock: &sync.RWMutex{},
   533  		wg:              &sync.WaitGroup{},
   534  		shmSize:         sandboxConfig.ShmSize,
   535  		sharePidNs:      sandboxConfig.SharePidNs,
   536  		stateful:        sandboxConfig.Stateful,
   537  		networkNS:       NetworkNamespace{NetNsPath: sandboxConfig.NetworkConfig.NetNSPath},
   538  		ctx:             ctx,
   539  	}
   540  
   541  	if s.newStore, err = persist.GetDriver(); err != nil || s.newStore == nil {
   542  		return nil, fmt.Errorf("failed to get fs persist driver: %v", err)
   543  	}
   544  
   545  	if err = globalSandboxList.addSandbox(s); err != nil {
   546  		return nil, err
   547  	}
   548  
   549  	defer func() {
   550  		if err != nil {
   551  			s.Logger().WithError(err).WithField("sandboxid", s.id).Error("Create new sandbox failed")
   552  			globalSandboxList.removeSandbox(s.id)
   553  			s.newStore.Destroy(s.id)
   554  		}
   555  	}()
   556  
   557  	spec := s.GetPatchedOCISpec()
   558  	if spec != nil && spec.Process.SelinuxLabel != "" {
   559  		sandboxConfig.HypervisorConfig.SELinuxProcessLabel = spec.Process.SelinuxLabel
   560  	}
   561  
   562  	if useOldStore(ctx) {
   563  		vcStore, err := store.NewVCSandboxStore(ctx, s.id)
   564  		if err != nil {
   565  			return nil, err
   566  		}
   567  
   568  		s.store = vcStore
   569  
   570  		// Fetch sandbox network to be able to access it from the sandbox structure.
   571  		var networkNS NetworkNamespace
   572  		if err = s.store.Load(store.Network, &networkNS); err == nil {
   573  			s.networkNS = networkNS
   574  		}
   575  
   576  		devices, err := s.store.LoadDevices()
   577  		if err != nil {
   578  			s.Logger().WithError(err).WithField("sandboxid", s.id).Warning("load sandbox devices failed")
   579  		}
   580  		s.devManager = deviceManager.NewDeviceManager(sandboxConfig.HypervisorConfig.BlockDeviceDriver,
   581  			sandboxConfig.HypervisorConfig.EnableVhostUserStore,
   582  			sandboxConfig.HypervisorConfig.VhostUserStorePath, devices)
   583  
   584  		// Load sandbox state. The hypervisor.createSandbox call, may need to access statei.
   585  		state, err := s.store.LoadState()
   586  		if err == nil {
   587  			s.state = state
   588  		}
   589  
   590  		if err = s.hypervisor.createSandbox(ctx, s.id, s.networkNS, &sandboxConfig.HypervisorConfig, s.stateful); err != nil {
   591  			return nil, err
   592  		}
   593  	} else {
   594  		s.devManager = deviceManager.NewDeviceManager(sandboxConfig.HypervisorConfig.BlockDeviceDriver,
   595  			sandboxConfig.HypervisorConfig.EnableVhostUserStore,
   596  			sandboxConfig.HypervisorConfig.VhostUserStorePath, nil)
   597  
   598  		// Ignore the error. Restore can fail for a new sandbox
   599  		if err := s.Restore(); err != nil {
   600  			s.Logger().WithError(err).Debug("restore sandbox failed")
   601  		}
   602  
   603  		// new store doesn't require hypervisor to be stored immediately
   604  		if err = s.hypervisor.createSandbox(ctx, s.id, s.networkNS, &sandboxConfig.HypervisorConfig, s.stateful); err != nil {
   605  			return nil, err
   606  		}
   607  	}
   608  
   609  	if err := s.createCgroupManager(); err != nil {
   610  		return nil, err
   611  	}
   612  
   613  	agentConfig, err := newAgentConfig(sandboxConfig.AgentType, sandboxConfig.AgentConfig)
   614  	if err != nil {
   615  		return nil, err
   616  	}
   617  
   618  	if s.disableVMShutdown, err = s.agent.init(ctx, s, agentConfig); err != nil {
   619  		return nil, err
   620  	}
   621  
   622  	return s, nil
   623  }
   624  
   625  func (s *Sandbox) createCgroupManager() error {
   626  	var err error
   627  	cgroupPath := ""
   628  
   629  	// Do not change current cgroup configuration.
   630  	// Create a spec without constraints
   631  	resources := specs.LinuxResources{}
   632  
   633  	if s.config == nil {
   634  		return fmt.Errorf("Could not create cgroup manager: empty sandbox configuration")
   635  	}
   636  
   637  	spec := s.GetPatchedOCISpec()
   638  	if spec != nil {
   639  		cgroupPath = spec.Linux.CgroupsPath
   640  
   641  		// Kata relies on the cgroup parent created and configured by the container
   642  		// engine, but sometimes the sandbox cgroup is not configured and the container
   643  		// may have access to all the resources, hence the runtime must constrain the
   644  		// sandbox and update the list of devices with the devices hotplugged in the
   645  		// hypervisor.
   646  		resources = *spec.Linux.Resources
   647  	}
   648  
   649  	if s.devManager != nil {
   650  		for _, d := range s.devManager.GetAllDevices() {
   651  			dev, err := vccgroups.DeviceToLinuxDevice(d.GetHostPath())
   652  			if err != nil {
   653  				s.Logger().WithError(err).WithField("device", d.GetHostPath()).Warn("Could not add device to sandbox resources")
   654  				continue
   655  			}
   656  			resources.Devices = append(resources.Devices, dev)
   657  		}
   658  	}
   659  
   660  	// Create the cgroup manager, this way it can be used later
   661  	// to create or detroy cgroups
   662  	if s.cgroupMgr, err = vccgroups.New(
   663  		&vccgroups.Config{
   664  			Cgroups:     s.config.Cgroups,
   665  			CgroupPaths: s.state.CgroupPaths,
   666  			Resources:   resources,
   667  			CgroupPath:  cgroupPath,
   668  		},
   669  	); err != nil {
   670  		return err
   671  	}
   672  
   673  	return nil
   674  }
   675  
   676  // storeSandbox stores a sandbox config.
   677  func (s *Sandbox) storeSandbox() error {
   678  	span, _ := s.trace("storeSandbox")
   679  	defer span.Finish()
   680  
   681  	// flush data to storage
   682  	if err := s.Save(); err != nil {
   683  		return err
   684  	}
   685  	return nil
   686  }
   687  
   688  func rLockSandbox(sandboxID string) (func() error, error) {
   689  	store, err := persist.GetDriver()
   690  	if err != nil {
   691  		return nil, fmt.Errorf("failed to get fs persist driver: %v", err)
   692  	}
   693  
   694  	return store.Lock(sandboxID, false)
   695  }
   696  
   697  func rwLockSandbox(sandboxID string) (func() error, error) {
   698  	store, err := persist.GetDriver()
   699  	if err != nil {
   700  		return nil, fmt.Errorf("failed to get fs persist driver: %v", err)
   701  	}
   702  
   703  	return store.Lock(sandboxID, true)
   704  }
   705  
   706  // fetchSandbox fetches a sandbox config from a sandbox ID and returns a sandbox.
   707  func fetchSandbox(ctx context.Context, sandboxID string) (sandbox *Sandbox, err error) {
   708  	virtLog.Info("fetch sandbox")
   709  	if sandboxID == "" {
   710  		return nil, vcTypes.ErrNeedSandboxID
   711  	}
   712  
   713  	sandbox, err = globalSandboxList.lookupSandbox(sandboxID)
   714  	if sandbox != nil && err == nil {
   715  		return sandbox, err
   716  	}
   717  
   718  	var config SandboxConfig
   719  
   720  	// Try to load sandbox config from old store at first.
   721  	c, ctx, err := loadSandboxConfigFromOldStore(ctx, sandboxID)
   722  	if err != nil {
   723  		virtLog.Warningf("failed to get sandbox config from old store: %v", err)
   724  		// If we failed to load sandbox config from old store, try again with new store.
   725  		c, err = loadSandboxConfig(sandboxID)
   726  		if err != nil {
   727  			virtLog.Warningf("failed to get sandbox config from new store: %v", err)
   728  			return nil, err
   729  		}
   730  	}
   731  	config = *c
   732  
   733  	if useOldStore(ctx) {
   734  		virtLog.Infof("Warning: old store has been deprecated.")
   735  	}
   736  	// fetchSandbox is not suppose to create new sandbox VM.
   737  	sandbox, err = createSandbox(ctx, config, nil)
   738  	if err != nil {
   739  		return nil, fmt.Errorf("failed to create sandbox with config %+v: %v", config, err)
   740  	}
   741  
   742  	// This sandbox already exists, we don't need to recreate the containers in the guest.
   743  	// We only need to fetch the containers from storage and create the container structs.
   744  	if err := sandbox.fetchContainers(); err != nil {
   745  		return nil, err
   746  	}
   747  
   748  	return sandbox, nil
   749  }
   750  
   751  // findContainer returns a container from the containers list held by the
   752  // sandbox structure, based on a container ID.
   753  func (s *Sandbox) findContainer(containerID string) (*Container, error) {
   754  	if s == nil {
   755  		return nil, vcTypes.ErrNeedSandbox
   756  	}
   757  
   758  	if containerID == "" {
   759  		return nil, vcTypes.ErrNeedContainerID
   760  	}
   761  
   762  	if c, ok := s.containers[containerID]; ok {
   763  		return c, nil
   764  	}
   765  
   766  	return nil, errors.Wrapf(vcTypes.ErrNoSuchContainer, "Could not find the container %q from the sandbox %q containers list",
   767  		containerID, s.id)
   768  }
   769  
   770  // removeContainer removes a container from the containers list held by the
   771  // sandbox structure, based on a container ID.
   772  func (s *Sandbox) removeContainer(containerID string) error {
   773  	if s == nil {
   774  		return vcTypes.ErrNeedSandbox
   775  	}
   776  
   777  	if containerID == "" {
   778  		return vcTypes.ErrNeedContainerID
   779  	}
   780  
   781  	if _, ok := s.containers[containerID]; !ok {
   782  		return errors.Wrapf(vcTypes.ErrNoSuchContainer, "Could not remove the container %q from the sandbox %q containers list",
   783  			containerID, s.id)
   784  	}
   785  
   786  	delete(s.containers, containerID)
   787  
   788  	return nil
   789  }
   790  
   791  // Delete deletes an already created sandbox.
   792  // The VM in which the sandbox is running will be shut down.
   793  func (s *Sandbox) Delete() error {
   794  	if s.state.State != types.StateReady &&
   795  		s.state.State != types.StatePaused &&
   796  		s.state.State != types.StateStopped {
   797  		return fmt.Errorf("Sandbox not ready, paused or stopped, impossible to delete")
   798  	}
   799  
   800  	for _, c := range s.containers {
   801  		if err := c.delete(); err != nil {
   802  			return err
   803  		}
   804  	}
   805  
   806  	if !rootless.IsRootless() {
   807  		if err := s.cgroupsDelete(); err != nil {
   808  			return err
   809  		}
   810  	}
   811  
   812  	globalSandboxList.removeSandbox(s.id)
   813  
   814  	if s.monitor != nil {
   815  		s.monitor.stop()
   816  	}
   817  
   818  	if err := s.hypervisor.cleanup(); err != nil {
   819  		s.Logger().WithError(err).Error("failed to cleanup hypervisor")
   820  	}
   821  
   822  	s.agent.cleanup(s)
   823  
   824  	return s.newStore.Destroy(s.id)
   825  }
   826  
   827  func (s *Sandbox) startNetworkMonitor() error {
   828  	span, _ := s.trace("startNetworkMonitor")
   829  	defer span.Finish()
   830  
   831  	binPath, err := os.Executable()
   832  	if err != nil {
   833  		return err
   834  	}
   835  
   836  	logLevel := "info"
   837  	if s.config.NetworkConfig.NetmonConfig.Debug {
   838  		logLevel = "debug"
   839  	}
   840  
   841  	params := netmonParams{
   842  		netmonPath: s.config.NetworkConfig.NetmonConfig.Path,
   843  		debug:      s.config.NetworkConfig.NetmonConfig.Debug,
   844  		logLevel:   logLevel,
   845  		runtime:    binPath,
   846  		sandboxID:  s.id,
   847  	}
   848  
   849  	return s.network.Run(s.networkNS.NetNsPath, func() error {
   850  		pid, err := startNetmon(params)
   851  		if err != nil {
   852  			return err
   853  		}
   854  
   855  		s.networkNS.NetmonPID = pid
   856  
   857  		return nil
   858  	})
   859  }
   860  
   861  func (s *Sandbox) createNetwork() error {
   862  	if s.config.NetworkConfig.DisableNewNetNs ||
   863  		s.config.NetworkConfig.NetNSPath == "" {
   864  		return nil
   865  	}
   866  
   867  	span, _ := s.trace("createNetwork")
   868  	defer span.Finish()
   869  
   870  	s.networkNS = NetworkNamespace{
   871  		NetNsPath:    s.config.NetworkConfig.NetNSPath,
   872  		NetNsCreated: s.config.NetworkConfig.NetNsCreated,
   873  	}
   874  
   875  	// In case there is a factory, network interfaces are hotplugged
   876  	// after vm is started.
   877  	if s.factory == nil {
   878  		// Add the network
   879  		endpoints, err := s.network.Add(s.ctx, &s.config.NetworkConfig, s.hypervisor, false)
   880  		if err != nil {
   881  			return err
   882  		}
   883  
   884  		s.networkNS.Endpoints = endpoints
   885  
   886  		if s.config.NetworkConfig.NetmonConfig.Enable {
   887  			if err := s.startNetworkMonitor(); err != nil {
   888  				return err
   889  			}
   890  		}
   891  	}
   892  	return nil
   893  }
   894  
   895  func (s *Sandbox) postCreatedNetwork() error {
   896  
   897  	return s.network.PostAdd(s.ctx, &s.networkNS, s.factory != nil)
   898  }
   899  
   900  func (s *Sandbox) removeNetwork() error {
   901  	span, _ := s.trace("removeNetwork")
   902  	defer span.Finish()
   903  
   904  	if s.config.NetworkConfig.NetmonConfig.Enable {
   905  		if err := stopNetmon(s.networkNS.NetmonPID); err != nil {
   906  			return err
   907  		}
   908  	}
   909  
   910  	return s.network.Remove(s.ctx, &s.networkNS, s.hypervisor)
   911  }
   912  
   913  func (s *Sandbox) generateNetInfo(inf *vcTypes.Interface) (NetworkInfo, error) {
   914  	hw, err := net.ParseMAC(inf.HwAddr)
   915  	if err != nil {
   916  		return NetworkInfo{}, err
   917  	}
   918  
   919  	var addrs []netlink.Addr
   920  	for _, addr := range inf.IPAddresses {
   921  		netlinkAddrStr := fmt.Sprintf("%s/%s", addr.Address, addr.Mask)
   922  		netlinkAddr, err := netlink.ParseAddr(netlinkAddrStr)
   923  		if err != nil {
   924  			return NetworkInfo{}, fmt.Errorf("could not parse %q: %v", netlinkAddrStr, err)
   925  		}
   926  
   927  		addrs = append(addrs, *netlinkAddr)
   928  	}
   929  
   930  	return NetworkInfo{
   931  		Iface: NetlinkIface{
   932  			LinkAttrs: netlink.LinkAttrs{
   933  				Name:         inf.Name,
   934  				HardwareAddr: hw,
   935  				MTU:          int(inf.Mtu),
   936  			},
   937  			Type: inf.LinkType,
   938  		},
   939  		Addrs: addrs,
   940  	}, nil
   941  }
   942  
   943  // AddInterface adds new nic to the sandbox.
   944  func (s *Sandbox) AddInterface(inf *vcTypes.Interface) (*vcTypes.Interface, error) {
   945  	netInfo, err := s.generateNetInfo(inf)
   946  	if err != nil {
   947  		return nil, err
   948  	}
   949  
   950  	endpoint, err := createEndpoint(netInfo, len(s.networkNS.Endpoints), s.config.NetworkConfig.InterworkingModel, nil)
   951  	if err != nil {
   952  		return nil, err
   953  	}
   954  
   955  	endpoint.SetProperties(netInfo)
   956  	if err := doNetNS(s.networkNS.NetNsPath, func(_ ns.NetNS) error {
   957  		s.Logger().WithField("endpoint-type", endpoint.Type()).Info("Hot attaching endpoint")
   958  		return endpoint.HotAttach(s.hypervisor)
   959  	}); err != nil {
   960  		return nil, err
   961  	}
   962  
   963  	// Update the sandbox storage
   964  	s.networkNS.Endpoints = append(s.networkNS.Endpoints, endpoint)
   965  	if err := s.Save(); err != nil {
   966  		return nil, err
   967  	}
   968  
   969  	// Add network for vm
   970  	inf.PciAddr = endpoint.PciAddr()
   971  	return s.agent.updateInterface(inf)
   972  }
   973  
   974  // RemoveInterface removes a nic of the sandbox.
   975  func (s *Sandbox) RemoveInterface(inf *vcTypes.Interface) (*vcTypes.Interface, error) {
   976  	for i, endpoint := range s.networkNS.Endpoints {
   977  		if endpoint.HardwareAddr() == inf.HwAddr {
   978  			s.Logger().WithField("endpoint-type", endpoint.Type()).Info("Hot detaching endpoint")
   979  			if err := endpoint.HotDetach(s.hypervisor, s.networkNS.NetNsCreated, s.networkNS.NetNsPath); err != nil {
   980  				return inf, err
   981  			}
   982  			s.networkNS.Endpoints = append(s.networkNS.Endpoints[:i], s.networkNS.Endpoints[i+1:]...)
   983  
   984  			if err := s.Save(); err != nil {
   985  				return inf, err
   986  			}
   987  
   988  			break
   989  		}
   990  	}
   991  	return nil, nil
   992  }
   993  
   994  // ListInterfaces lists all nics and their configurations in the sandbox.
   995  func (s *Sandbox) ListInterfaces() ([]*vcTypes.Interface, error) {
   996  	return s.agent.listInterfaces()
   997  }
   998  
   999  // UpdateRoutes updates the sandbox route table (e.g. for portmapping support).
  1000  func (s *Sandbox) UpdateRoutes(routes []*vcTypes.Route) ([]*vcTypes.Route, error) {
  1001  	return s.agent.updateRoutes(routes)
  1002  }
  1003  
  1004  // ListRoutes lists all routes and their configurations in the sandbox.
  1005  func (s *Sandbox) ListRoutes() ([]*vcTypes.Route, error) {
  1006  	return s.agent.listRoutes()
  1007  }
  1008  
  1009  // startVM starts the VM.
  1010  func (s *Sandbox) startVM() (err error) {
  1011  	span, ctx := s.trace("startVM")
  1012  	defer span.Finish()
  1013  
  1014  	s.Logger().Info("Starting VM")
  1015  
  1016  	if err := s.network.Run(s.networkNS.NetNsPath, func() error {
  1017  		if s.factory != nil {
  1018  			vm, err := s.factory.GetVM(ctx, VMConfig{
  1019  				HypervisorType:   s.config.HypervisorType,
  1020  				HypervisorConfig: s.config.HypervisorConfig,
  1021  				AgentType:        s.config.AgentType,
  1022  				AgentConfig:      s.config.AgentConfig,
  1023  				ProxyType:        s.config.ProxyType,
  1024  				ProxyConfig:      s.config.ProxyConfig,
  1025  			})
  1026  			if err != nil {
  1027  				return err
  1028  			}
  1029  
  1030  			return vm.assignSandbox(s)
  1031  		}
  1032  
  1033  		return s.hypervisor.startSandbox(vmStartTimeout)
  1034  	}); err != nil {
  1035  		return err
  1036  	}
  1037  
  1038  	defer func() {
  1039  		if err != nil {
  1040  			s.hypervisor.stopSandbox()
  1041  		}
  1042  	}()
  1043  
  1044  	// In case of vm factory, network interfaces are hotplugged
  1045  	// after vm is started.
  1046  	if s.factory != nil {
  1047  		endpoints, err := s.network.Add(s.ctx, &s.config.NetworkConfig, s.hypervisor, true)
  1048  		if err != nil {
  1049  			return err
  1050  		}
  1051  
  1052  		s.networkNS.Endpoints = endpoints
  1053  
  1054  		if s.config.NetworkConfig.NetmonConfig.Enable {
  1055  			if err := s.startNetworkMonitor(); err != nil {
  1056  				return err
  1057  			}
  1058  		}
  1059  	}
  1060  
  1061  	s.Logger().Info("VM started")
  1062  
  1063  	// Once the hypervisor is done starting the sandbox,
  1064  	// we want to guarantee that it is manageable.
  1065  	// For that we need to ask the agent to start the
  1066  	// sandbox inside the VM.
  1067  	if err := s.agent.startSandbox(s); err != nil {
  1068  		return err
  1069  	}
  1070  
  1071  	s.Logger().Info("Agent started in the sandbox")
  1072  
  1073  	return nil
  1074  }
  1075  
  1076  // stopVM: stop the sandbox's VM
  1077  func (s *Sandbox) stopVM() error {
  1078  	span, _ := s.trace("stopVM")
  1079  	defer span.Finish()
  1080  
  1081  	s.Logger().Info("Stopping sandbox in the VM")
  1082  	if err := s.agent.stopSandbox(s); err != nil {
  1083  		s.Logger().WithError(err).WithField("sandboxid", s.id).Warning("Agent did not stop sandbox")
  1084  	}
  1085  
  1086  	if s.disableVMShutdown {
  1087  		// Do not kill the VM - allow the agent to shut it down
  1088  		// (only used to support static agent tracing).
  1089  		return nil
  1090  	}
  1091  
  1092  	s.Logger().Info("Stopping VM")
  1093  	return s.hypervisor.stopSandbox()
  1094  }
  1095  
  1096  func (s *Sandbox) addContainer(c *Container) error {
  1097  	if _, ok := s.containers[c.id]; ok {
  1098  		return fmt.Errorf("Duplicated container: %s", c.id)
  1099  	}
  1100  	s.containers[c.id] = c
  1101  
  1102  	return nil
  1103  }
  1104  
  1105  // newContainers creates new containers structure and
  1106  // adds them to the sandbox. It does not create the containers
  1107  // in the guest. This should only be used when fetching a
  1108  // sandbox that already exists.
  1109  func (s *Sandbox) fetchContainers() error {
  1110  	for i, contConfig := range s.config.Containers {
  1111  		// Add spec from bundle path
  1112  		spec, err := compatoci.GetContainerSpec(contConfig.Annotations)
  1113  		if err != nil {
  1114  			return err
  1115  		}
  1116  		contConfig.CustomSpec = &spec
  1117  		s.config.Containers[i] = contConfig
  1118  
  1119  		c, err := newContainer(s, &s.config.Containers[i])
  1120  		if err != nil {
  1121  			return err
  1122  		}
  1123  
  1124  		if err := s.addContainer(c); err != nil {
  1125  			return err
  1126  		}
  1127  	}
  1128  
  1129  	return nil
  1130  }
  1131  
  1132  // CreateContainer creates a new container in the sandbox
  1133  // This should be called only when the sandbox is already created.
  1134  // It will add new container config to sandbox.config.Containers
  1135  func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, error) {
  1136  	// Create the container.
  1137  	c, err := newContainer(s, &contConfig)
  1138  	if err != nil {
  1139  		return nil, err
  1140  	}
  1141  
  1142  	// Update sandbox config.
  1143  	s.config.Containers = append(s.config.Containers, contConfig)
  1144  
  1145  	defer func() {
  1146  		if err != nil {
  1147  			if len(s.config.Containers) > 0 {
  1148  				// delete container config
  1149  				s.config.Containers = s.config.Containers[:len(s.config.Containers)-1]
  1150  			}
  1151  		}
  1152  	}()
  1153  
  1154  	err = c.create()
  1155  	if err != nil {
  1156  		return nil, err
  1157  	}
  1158  
  1159  	// Add the container to the containers list in the sandbox.
  1160  	if err = s.addContainer(c); err != nil {
  1161  		return nil, err
  1162  	}
  1163  
  1164  	defer func() {
  1165  		// Rollback if error happens.
  1166  		if err != nil {
  1167  			s.removeContainer(c.id)
  1168  		}
  1169  	}()
  1170  
  1171  	// Sandbox is reponsable to update VM resources needed by Containers
  1172  	// Update resources after having added containers to the sandbox, since
  1173  	// container status is requiered to know if more resources should be added.
  1174  	err = s.updateResources()
  1175  	if err != nil {
  1176  		return nil, err
  1177  	}
  1178  
  1179  	if err = s.cgroupsUpdate(); err != nil {
  1180  		return nil, err
  1181  	}
  1182  
  1183  	if err = s.storeSandbox(); err != nil {
  1184  		return nil, err
  1185  	}
  1186  
  1187  	return c, nil
  1188  }
  1189  
  1190  // StartContainer starts a container in the sandbox
  1191  func (s *Sandbox) StartContainer(containerID string) (VCContainer, error) {
  1192  	// Fetch the container.
  1193  	c, err := s.findContainer(containerID)
  1194  	if err != nil {
  1195  		return nil, err
  1196  	}
  1197  
  1198  	// Start it.
  1199  	err = c.start()
  1200  	if err != nil {
  1201  		return nil, err
  1202  	}
  1203  
  1204  	if err = s.storeSandbox(); err != nil {
  1205  		return nil, err
  1206  	}
  1207  
  1208  	s.Logger().Info("Container is started")
  1209  
  1210  	// Update sandbox resources in case a stopped container
  1211  	// is started
  1212  	err = s.updateResources()
  1213  	if err != nil {
  1214  		return nil, err
  1215  	}
  1216  
  1217  	return c, nil
  1218  }
  1219  
  1220  // StopContainer stops a container in the sandbox
  1221  func (s *Sandbox) StopContainer(containerID string, force bool) (VCContainer, error) {
  1222  	// Fetch the container.
  1223  	c, err := s.findContainer(containerID)
  1224  	if err != nil {
  1225  		return nil, err
  1226  	}
  1227  
  1228  	// Stop it.
  1229  	if err := c.stop(force); err != nil {
  1230  		return nil, err
  1231  	}
  1232  
  1233  	if err = s.storeSandbox(); err != nil {
  1234  		return nil, err
  1235  	}
  1236  	return c, nil
  1237  }
  1238  
  1239  // KillContainer signals a container in the sandbox
  1240  func (s *Sandbox) KillContainer(containerID string, signal syscall.Signal, all bool) error {
  1241  	// Fetch the container.
  1242  	c, err := s.findContainer(containerID)
  1243  	if err != nil {
  1244  		return err
  1245  	}
  1246  
  1247  	// Send a signal to the process.
  1248  	err = c.kill(signal, all)
  1249  
  1250  	// SIGKILL should never fail otherwise it is
  1251  	// impossible to clean things up.
  1252  	if signal == syscall.SIGKILL {
  1253  		return nil
  1254  	}
  1255  
  1256  	return err
  1257  }
  1258  
  1259  // DeleteContainer deletes a container from the sandbox
  1260  func (s *Sandbox) DeleteContainer(containerID string) (VCContainer, error) {
  1261  	if containerID == "" {
  1262  		return nil, vcTypes.ErrNeedContainerID
  1263  	}
  1264  
  1265  	// Fetch the container.
  1266  	c, err := s.findContainer(containerID)
  1267  	if err != nil {
  1268  		return nil, err
  1269  	}
  1270  
  1271  	// Delete it.
  1272  	err = c.delete()
  1273  	if err != nil {
  1274  		return nil, err
  1275  	}
  1276  
  1277  	// Update sandbox config
  1278  	for idx, contConfig := range s.config.Containers {
  1279  		if contConfig.ID == containerID {
  1280  			s.config.Containers = append(s.config.Containers[:idx], s.config.Containers[idx+1:]...)
  1281  			break
  1282  		}
  1283  	}
  1284  
  1285  	if err = s.storeSandbox(); err != nil {
  1286  		return nil, err
  1287  	}
  1288  	return c, nil
  1289  }
  1290  
  1291  // ProcessListContainer lists every process running inside a specific
  1292  // container in the sandbox.
  1293  func (s *Sandbox) ProcessListContainer(containerID string, options ProcessListOptions) (ProcessList, error) {
  1294  	// Fetch the container.
  1295  	c, err := s.findContainer(containerID)
  1296  	if err != nil {
  1297  		return nil, err
  1298  	}
  1299  
  1300  	// Get the process list related to the container.
  1301  	return c.processList(options)
  1302  }
  1303  
  1304  // StatusContainer gets the status of a container
  1305  // TODO: update container status properly, see kata-containers/runtime#253
  1306  func (s *Sandbox) StatusContainer(containerID string) (ContainerStatus, error) {
  1307  	if containerID == "" {
  1308  		return ContainerStatus{}, vcTypes.ErrNeedContainerID
  1309  	}
  1310  
  1311  	if c, ok := s.containers[containerID]; ok {
  1312  		rootfs := c.config.RootFs.Source
  1313  		if c.config.RootFs.Mounted {
  1314  			rootfs = c.config.RootFs.Target
  1315  		}
  1316  
  1317  		return ContainerStatus{
  1318  			ID:          c.id,
  1319  			State:       c.state,
  1320  			PID:         c.process.Pid,
  1321  			StartTime:   c.process.StartTime,
  1322  			RootFs:      rootfs,
  1323  			Annotations: c.config.Annotations,
  1324  		}, nil
  1325  	}
  1326  
  1327  	return ContainerStatus{}, vcTypes.ErrNoSuchContainer
  1328  }
  1329  
  1330  // EnterContainer is the virtcontainers container command execution entry point.
  1331  // EnterContainer enters an already running container and runs a given command.
  1332  func (s *Sandbox) EnterContainer(containerID string, cmd types.Cmd) (VCContainer, *Process, error) {
  1333  	// Fetch the container.
  1334  	c, err := s.findContainer(containerID)
  1335  	if err != nil {
  1336  		return nil, nil, err
  1337  	}
  1338  
  1339  	// Enter it.
  1340  	process, err := c.enter(cmd)
  1341  	if err != nil {
  1342  		return nil, nil, err
  1343  	}
  1344  
  1345  	return c, process, nil
  1346  }
  1347  
  1348  // UpdateContainer update a running container.
  1349  func (s *Sandbox) UpdateContainer(containerID string, resources specs.LinuxResources) error {
  1350  	// Fetch the container.
  1351  	c, err := s.findContainer(containerID)
  1352  	if err != nil {
  1353  		return err
  1354  	}
  1355  
  1356  	err = c.update(resources)
  1357  	if err != nil {
  1358  		return err
  1359  	}
  1360  
  1361  	if err := s.cgroupsUpdate(); err != nil {
  1362  		return err
  1363  	}
  1364  
  1365  	if err = s.storeSandbox(); err != nil {
  1366  		return err
  1367  	}
  1368  	return nil
  1369  }
  1370  
  1371  // StatsContainer return the stats of a running container
  1372  func (s *Sandbox) StatsContainer(containerID string) (ContainerStats, error) {
  1373  	// Fetch the container.
  1374  	c, err := s.findContainer(containerID)
  1375  	if err != nil {
  1376  		return ContainerStats{}, err
  1377  	}
  1378  
  1379  	stats, err := c.stats()
  1380  	if err != nil {
  1381  		return ContainerStats{}, err
  1382  	}
  1383  	return *stats, nil
  1384  }
  1385  
  1386  // Stats returns the stats of a running sandbox
  1387  func (s *Sandbox) Stats() (SandboxStats, error) {
  1388  	if s.state.CgroupPath == "" {
  1389  		return SandboxStats{}, fmt.Errorf("sandbox cgroup path is empty")
  1390  	}
  1391  
  1392  	var path string
  1393  	var cgroupSubsystems cgroups.Hierarchy
  1394  
  1395  	if s.config.SandboxCgroupOnly {
  1396  		cgroupSubsystems = cgroups.V1
  1397  		path = s.state.CgroupPath
  1398  	} else {
  1399  		cgroupSubsystems = V1NoConstraints
  1400  		path = cgroupNoConstraintsPath(s.state.CgroupPath)
  1401  	}
  1402  
  1403  	cgroup, err := cgroupsLoadFunc(cgroupSubsystems, cgroups.StaticPath(path))
  1404  	if err != nil {
  1405  		return SandboxStats{}, fmt.Errorf("Could not load sandbox cgroup in %v: %v", s.state.CgroupPath, err)
  1406  	}
  1407  
  1408  	metrics, err := cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist))
  1409  	if err != nil {
  1410  		return SandboxStats{}, err
  1411  	}
  1412  
  1413  	stats := SandboxStats{}
  1414  
  1415  	stats.CgroupStats.CPUStats.CPUUsage.TotalUsage = metrics.CPU.Usage.Total
  1416  	stats.CgroupStats.MemoryStats.Usage.Usage = metrics.Memory.Usage.Usage
  1417  	tids, err := s.hypervisor.getThreadIDs()
  1418  	if err != nil {
  1419  		return stats, err
  1420  	}
  1421  	stats.Cpus = len(tids.vcpus)
  1422  
  1423  	return stats, nil
  1424  }
  1425  
  1426  // PauseContainer pauses a running container.
  1427  func (s *Sandbox) PauseContainer(containerID string) error {
  1428  	// Fetch the container.
  1429  	c, err := s.findContainer(containerID)
  1430  	if err != nil {
  1431  		return err
  1432  	}
  1433  
  1434  	// Pause the container.
  1435  	if err := c.pause(); err != nil {
  1436  		return err
  1437  	}
  1438  
  1439  	if err = s.storeSandbox(); err != nil {
  1440  		return err
  1441  	}
  1442  	return nil
  1443  }
  1444  
  1445  // ResumeContainer resumes a paused container.
  1446  func (s *Sandbox) ResumeContainer(containerID string) error {
  1447  	// Fetch the container.
  1448  	c, err := s.findContainer(containerID)
  1449  	if err != nil {
  1450  		return err
  1451  	}
  1452  
  1453  	// Resume the container.
  1454  	if err := c.resume(); err != nil {
  1455  		return err
  1456  	}
  1457  
  1458  	if err = s.storeSandbox(); err != nil {
  1459  		return err
  1460  	}
  1461  	return nil
  1462  }
  1463  
  1464  // createContainers registers all containers to the proxy, create the
  1465  // containers in the guest and starts one shim per container.
  1466  func (s *Sandbox) createContainers() error {
  1467  	span, _ := s.trace("createContainers")
  1468  	defer span.Finish()
  1469  
  1470  	for _, contConfig := range s.config.Containers {
  1471  
  1472  		c, err := newContainer(s, &contConfig)
  1473  		if err != nil {
  1474  			return err
  1475  		}
  1476  		if err := c.create(); err != nil {
  1477  			return err
  1478  		}
  1479  
  1480  		if err := s.addContainer(c); err != nil {
  1481  			return err
  1482  		}
  1483  	}
  1484  
  1485  	// Update resources after having added containers to the sandbox, since
  1486  	// container status is requiered to know if more resources should be added.
  1487  	if err := s.updateResources(); err != nil {
  1488  		return err
  1489  	}
  1490  
  1491  	if err := s.cgroupsUpdate(); err != nil {
  1492  		return err
  1493  	}
  1494  	if err := s.storeSandbox(); err != nil {
  1495  		return err
  1496  	}
  1497  
  1498  	return nil
  1499  }
  1500  
  1501  // Start starts a sandbox. The containers that are making the sandbox
  1502  // will be started.
  1503  func (s *Sandbox) Start() error {
  1504  	if err := s.state.ValidTransition(s.state.State, types.StateRunning); err != nil {
  1505  		return err
  1506  	}
  1507  
  1508  	prevState := s.state.State
  1509  
  1510  	if err := s.setSandboxState(types.StateRunning); err != nil {
  1511  		return err
  1512  	}
  1513  
  1514  	var startErr error
  1515  	defer func() {
  1516  		if startErr != nil {
  1517  			s.setSandboxState(prevState)
  1518  		}
  1519  	}()
  1520  	for _, c := range s.containers {
  1521  		if startErr = c.start(); startErr != nil {
  1522  			return startErr
  1523  		}
  1524  	}
  1525  
  1526  	if err := s.storeSandbox(); err != nil {
  1527  		return err
  1528  	}
  1529  
  1530  	s.Logger().Info("Sandbox is started")
  1531  
  1532  	return nil
  1533  }
  1534  
  1535  // Stop stops a sandbox. The containers that are making the sandbox
  1536  // will be destroyed.
  1537  // When force is true, ignore guest related stop failures.
  1538  func (s *Sandbox) Stop(force bool) error {
  1539  	span, _ := s.trace("stop")
  1540  	defer span.Finish()
  1541  
  1542  	if s.state.State == types.StateStopped {
  1543  		s.Logger().Info("sandbox already stopped")
  1544  		return nil
  1545  	}
  1546  
  1547  	if err := s.state.ValidTransition(s.state.State, types.StateStopped); err != nil {
  1548  		return err
  1549  	}
  1550  
  1551  	for _, c := range s.containers {
  1552  		if err := c.stop(force); err != nil {
  1553  			return err
  1554  		}
  1555  	}
  1556  
  1557  	if err := s.stopVM(); err != nil && !force {
  1558  		return err
  1559  	}
  1560  
  1561  	if err := s.setSandboxState(types.StateStopped); err != nil {
  1562  		return err
  1563  	}
  1564  
  1565  	// Remove the network.
  1566  	if err := s.removeNetwork(); err != nil && !force {
  1567  		return err
  1568  	}
  1569  
  1570  	if err := s.storeSandbox(); err != nil {
  1571  		return err
  1572  	}
  1573  
  1574  	return nil
  1575  }
  1576  
  1577  // list lists all sandbox running on the host.
  1578  func (s *Sandbox) list() ([]Sandbox, error) {
  1579  	return nil, nil
  1580  }
  1581  
  1582  // enter runs an executable within a sandbox.
  1583  func (s *Sandbox) enter(args []string) error {
  1584  	return nil
  1585  }
  1586  
  1587  // setSandboxState sets both the in-memory and on-disk state of the
  1588  // sandbox.
  1589  func (s *Sandbox) setSandboxState(state types.StateString) error {
  1590  	if state == "" {
  1591  		return vcTypes.ErrNeedState
  1592  	}
  1593  
  1594  	// update in-memory state
  1595  	s.state.State = state
  1596  
  1597  	if useOldStore(s.ctx) {
  1598  		return s.store.Store(store.State, s.state)
  1599  	}
  1600  	return nil
  1601  }
  1602  
  1603  const maxBlockIndex = 65535
  1604  
  1605  // getAndSetSandboxBlockIndex retrieves an unused sandbox block index from
  1606  // the BlockIndexMap and marks it as used. This index is used to maintain the
  1607  // index at which a block device is assigned to a container in the sandbox.
  1608  func (s *Sandbox) getAndSetSandboxBlockIndex() (int, error) {
  1609  	var err error
  1610  	currentIndex := -1
  1611  	for i := 0; i < maxBlockIndex; i++ {
  1612  		if _, ok := s.state.BlockIndexMap[i]; !ok {
  1613  			currentIndex = i
  1614  			break
  1615  		}
  1616  	}
  1617  	if currentIndex == -1 {
  1618  		return -1, errors.New("no available block index")
  1619  	}
  1620  	s.state.BlockIndexMap[currentIndex] = struct{}{}
  1621  	defer func() {
  1622  		if err != nil {
  1623  			delete(s.state.BlockIndexMap, currentIndex)
  1624  		}
  1625  	}()
  1626  
  1627  	return currentIndex, nil
  1628  }
  1629  
  1630  // unsetSandboxBlockIndex deletes the current sandbox block index from BlockIndexMap.
  1631  // This is used to recover from failure while adding a block device.
  1632  func (s *Sandbox) unsetSandboxBlockIndex(index int) error {
  1633  	var err error
  1634  	original := index
  1635  	delete(s.state.BlockIndexMap, index)
  1636  	defer func() {
  1637  		if err != nil {
  1638  			s.state.BlockIndexMap[original] = struct{}{}
  1639  		}
  1640  	}()
  1641  
  1642  	return nil
  1643  }
  1644  
  1645  // HotplugAddDevice is used for add a device to sandbox
  1646  // Sandbox implement DeviceReceiver interface from device/api/interface.go
  1647  func (s *Sandbox) HotplugAddDevice(device api.Device, devType config.DeviceType) error {
  1648  	span, _ := s.trace("HotplugAddDevice")
  1649  	defer span.Finish()
  1650  
  1651  	if s.config.SandboxCgroupOnly {
  1652  		// We are about to add a device to the hypervisor,
  1653  		// the device cgroup MUST be updated since the hypervisor
  1654  		// will need access to such device
  1655  		hdev := device.GetHostPath()
  1656  		if err := s.cgroupMgr.AddDevice(hdev); err != nil {
  1657  			s.Logger().WithError(err).WithField("device", hdev).
  1658  				Warn("Could not add device to cgroup")
  1659  		}
  1660  	}
  1661  
  1662  	switch devType {
  1663  	case config.DeviceVFIO:
  1664  		vfioDevices, ok := device.GetDeviceInfo().([]*config.VFIODev)
  1665  		if !ok {
  1666  			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
  1667  		}
  1668  
  1669  		// adding a group of VFIO devices
  1670  		for _, dev := range vfioDevices {
  1671  			if _, err := s.hypervisor.hotplugAddDevice(dev, vfioDev); err != nil {
  1672  				s.Logger().
  1673  					WithFields(logrus.Fields{
  1674  						"sandbox":         s.id,
  1675  						"vfio-device-ID":  dev.ID,
  1676  						"vfio-device-BDF": dev.BDF,
  1677  					}).WithError(err).Error("failed to hotplug VFIO device")
  1678  				return err
  1679  			}
  1680  		}
  1681  		return nil
  1682  	case config.DeviceBlock:
  1683  		blockDevice, ok := device.(*drivers.BlockDevice)
  1684  		if !ok {
  1685  			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
  1686  		}
  1687  		_, err := s.hypervisor.hotplugAddDevice(blockDevice.BlockDrive, blockDev)
  1688  		return err
  1689  	case config.VhostUserBlk:
  1690  		vhostUserBlkDevice, ok := device.(*drivers.VhostUserBlkDevice)
  1691  		if !ok {
  1692  			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
  1693  		}
  1694  		_, err := s.hypervisor.hotplugAddDevice(vhostUserBlkDevice.VhostUserDeviceAttrs, vhostuserDev)
  1695  		return err
  1696  	case config.DeviceGeneric:
  1697  		// TODO: what?
  1698  		return nil
  1699  	}
  1700  	return nil
  1701  }
  1702  
  1703  // HotplugRemoveDevice is used for removing a device from sandbox
  1704  // Sandbox implement DeviceReceiver interface from device/api/interface.go
  1705  func (s *Sandbox) HotplugRemoveDevice(device api.Device, devType config.DeviceType) error {
  1706  	defer func() {
  1707  		if s.config.SandboxCgroupOnly {
  1708  			// Remove device from cgroup, the hypervisor
  1709  			// should not have access to such device anymore.
  1710  			hdev := device.GetHostPath()
  1711  			if err := s.cgroupMgr.RemoveDevice(hdev); err != nil {
  1712  				s.Logger().WithError(err).WithField("device", hdev).
  1713  					Warn("Could not remove device from cgroup")
  1714  			}
  1715  		}
  1716  	}()
  1717  
  1718  	switch devType {
  1719  	case config.DeviceVFIO:
  1720  		vfioDevices, ok := device.GetDeviceInfo().([]*config.VFIODev)
  1721  		if !ok {
  1722  			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
  1723  		}
  1724  
  1725  		// remove a group of VFIO devices
  1726  		for _, dev := range vfioDevices {
  1727  			if _, err := s.hypervisor.hotplugRemoveDevice(dev, vfioDev); err != nil {
  1728  				s.Logger().WithError(err).
  1729  					WithFields(logrus.Fields{
  1730  						"sandbox":         s.id,
  1731  						"vfio-device-ID":  dev.ID,
  1732  						"vfio-device-BDF": dev.BDF,
  1733  					}).Error("failed to hot unplug VFIO device")
  1734  				return err
  1735  			}
  1736  		}
  1737  		return nil
  1738  	case config.DeviceBlock:
  1739  		blockDrive, ok := device.GetDeviceInfo().(*config.BlockDrive)
  1740  		if !ok {
  1741  			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
  1742  		}
  1743  		_, err := s.hypervisor.hotplugRemoveDevice(blockDrive, blockDev)
  1744  		return err
  1745  	case config.VhostUserBlk:
  1746  		vhostUserDeviceAttrs, ok := device.GetDeviceInfo().(*config.VhostUserDeviceAttrs)
  1747  		if !ok {
  1748  			return fmt.Errorf("device type mismatch, expect device type to be %s", devType)
  1749  		}
  1750  		_, err := s.hypervisor.hotplugRemoveDevice(vhostUserDeviceAttrs, vhostuserDev)
  1751  		return err
  1752  	case config.DeviceGeneric:
  1753  		// TODO: what?
  1754  		return nil
  1755  	}
  1756  	return nil
  1757  }
  1758  
  1759  // GetAndSetSandboxBlockIndex is used for getting and setting virtio-block indexes
  1760  // Sandbox implement DeviceReceiver interface from device/api/interface.go
  1761  func (s *Sandbox) GetAndSetSandboxBlockIndex() (int, error) {
  1762  	return s.getAndSetSandboxBlockIndex()
  1763  }
  1764  
  1765  // UnsetSandboxBlockIndex unsets block indexes
  1766  // Sandbox implement DeviceReceiver interface from device/api/interface.go
  1767  func (s *Sandbox) UnsetSandboxBlockIndex(index int) error {
  1768  	return s.unsetSandboxBlockIndex(index)
  1769  }
  1770  
  1771  // AppendDevice can only handle vhost user device currently, it adds a
  1772  // vhost user device to sandbox
  1773  // Sandbox implement DeviceReceiver interface from device/api/interface.go
  1774  func (s *Sandbox) AppendDevice(device api.Device) error {
  1775  	switch device.DeviceType() {
  1776  	case config.VhostUserSCSI, config.VhostUserNet, config.VhostUserBlk, config.VhostUserFS:
  1777  		return s.hypervisor.addDevice(device.GetDeviceInfo().(*config.VhostUserDeviceAttrs), vhostuserDev)
  1778  	}
  1779  	return fmt.Errorf("unsupported device type")
  1780  }
  1781  
  1782  // AddDevice will add a device to sandbox
  1783  func (s *Sandbox) AddDevice(info config.DeviceInfo) (api.Device, error) {
  1784  	if s.devManager == nil {
  1785  		return nil, fmt.Errorf("device manager isn't initialized")
  1786  	}
  1787  
  1788  	var err error
  1789  	b, err := s.devManager.NewDevice(info)
  1790  	if err != nil {
  1791  		return nil, err
  1792  	}
  1793  	defer func() {
  1794  		if err != nil {
  1795  			s.devManager.RemoveDevice(b.DeviceID())
  1796  		}
  1797  	}()
  1798  
  1799  	if err = s.devManager.AttachDevice(b.DeviceID(), s); err != nil {
  1800  		return nil, err
  1801  	}
  1802  	defer func() {
  1803  		if err != nil {
  1804  			s.devManager.DetachDevice(b.DeviceID(), s)
  1805  		}
  1806  	}()
  1807  
  1808  	return b, nil
  1809  }
  1810  
  1811  // updateResources will calculate the resources required for the virtual machine, and
  1812  // adjust the virtual machine sizing accordingly. For a given sandbox, it will calculate the
  1813  // number of vCPUs required based on the sum of container requests, plus default CPUs for the VM.
  1814  // Similar is done for memory. If changes in memory or CPU are made, the VM will be updated and
  1815  // the agent will online the applicable CPU and memory.
  1816  func (s *Sandbox) updateResources() error {
  1817  	if s == nil {
  1818  		return errors.New("sandbox is nil")
  1819  	}
  1820  
  1821  	if s.config == nil {
  1822  		return fmt.Errorf("sandbox config is nil")
  1823  	}
  1824  
  1825  	sandboxVCPUs := s.calculateSandboxCPUs()
  1826  	// Add default vcpus for sandbox
  1827  	sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs
  1828  
  1829  	sandboxMemoryByte := s.calculateSandboxMemory()
  1830  	// Add default / rsvd memory for sandbox.
  1831  	sandboxMemoryByte += int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
  1832  
  1833  	// Update VCPUs
  1834  	s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs")
  1835  	oldCPUs, newCPUs, err := s.hypervisor.resizeVCPUs(sandboxVCPUs)
  1836  	if err != nil {
  1837  		return err
  1838  	}
  1839  
  1840  	// If the CPUs were increased, ask agent to online them
  1841  	if oldCPUs < newCPUs {
  1842  		vcpusAdded := newCPUs - oldCPUs
  1843  		if err := s.agent.onlineCPUMem(vcpusAdded, true); err != nil {
  1844  			return err
  1845  		}
  1846  	}
  1847  	s.Logger().Debugf("Sandbox CPUs: %d", newCPUs)
  1848  
  1849  	// Update Memory
  1850  	s.Logger().WithField("memory-sandbox-size-byte", sandboxMemoryByte).Debugf("Request to hypervisor to update memory")
  1851  	newMemory, updatedMemoryDevice, err := s.hypervisor.resizeMemory(uint32(sandboxMemoryByte>>utils.MibToBytesShift), s.state.GuestMemoryBlockSizeMB, s.state.GuestMemoryHotplugProbe)
  1852  	if err != nil {
  1853  		return err
  1854  	}
  1855  	s.Logger().Debugf("Sandbox memory size: %d MB", newMemory)
  1856  	if s.state.GuestMemoryHotplugProbe && updatedMemoryDevice.addr != 0 {
  1857  		// notify the guest kernel about memory hot-add event, before onlining them
  1858  		s.Logger().Debugf("notify guest kernel memory hot-add event via probe interface, memory device located at 0x%x", updatedMemoryDevice.addr)
  1859  		if err := s.agent.memHotplugByProbe(updatedMemoryDevice.addr, uint32(updatedMemoryDevice.sizeMB), s.state.GuestMemoryBlockSizeMB); err != nil {
  1860  			return err
  1861  		}
  1862  	}
  1863  	if err := s.agent.onlineCPUMem(0, false); err != nil {
  1864  		return err
  1865  	}
  1866  	return nil
  1867  }
  1868  
  1869  func (s *Sandbox) calculateSandboxMemory() int64 {
  1870  	memorySandbox := int64(0)
  1871  	for _, c := range s.config.Containers {
  1872  		// Do not hot add again non-running containers resources
  1873  		if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped {
  1874  			s.Logger().WithField("container-id", c.ID).Debug("Do not taking into account memory resources of not running containers")
  1875  			continue
  1876  		}
  1877  
  1878  		if m := c.Resources.Memory; m != nil && m.Limit != nil {
  1879  			memorySandbox += *m.Limit
  1880  		}
  1881  	}
  1882  	return memorySandbox
  1883  }
  1884  
  1885  func (s *Sandbox) calculateSandboxCPUs() uint32 {
  1886  	mCPU := uint32(0)
  1887  
  1888  	for _, c := range s.config.Containers {
  1889  		// Do not hot add again non-running containers resources
  1890  		if cont, ok := s.containers[c.ID]; ok && cont.state.State == types.StateStopped {
  1891  			s.Logger().WithField("container-id", c.ID).Debug("Do not taking into account CPU resources of not running containers")
  1892  			continue
  1893  		}
  1894  
  1895  		if cpu := c.Resources.CPU; cpu != nil {
  1896  			if cpu.Period != nil && cpu.Quota != nil {
  1897  				mCPU += utils.CalculateMilliCPUs(*cpu.Quota, *cpu.Period)
  1898  			}
  1899  
  1900  		}
  1901  	}
  1902  	return utils.CalculateVCpusFromMilliCpus(mCPU)
  1903  }
  1904  
  1905  // GetHypervisorType is used for getting Hypervisor name currently used.
  1906  // Sandbox implement DeviceReceiver interface from device/api/interface.go
  1907  func (s *Sandbox) GetHypervisorType() string {
  1908  	return string(s.config.HypervisorType)
  1909  }
  1910  
  1911  // cgroupsUpdate will:
  1912  //  1) get the v1constraints cgroup associated with the stored cgroup path
  1913  //  2) (re-)add hypervisor vCPU threads to the appropriate cgroup
  1914  //  3) If we are managing sandbox cgroup, update the v1constraints cgroup size
  1915  func (s *Sandbox) cgroupsUpdate() error {
  1916  
  1917  	// If Kata is configured for SandboxCgroupOnly, the VMM and its processes are already
  1918  	// in the Kata sandbox cgroup (inherited). No need to move threads/processes, and we should
  1919  	// rely on parent's cgroup CPU/memory values
  1920  	if s.config.SandboxCgroupOnly {
  1921  		return nil
  1922  	}
  1923  
  1924  	if s.state.CgroupPath == "" {
  1925  		s.Logger().Warn("sandbox's cgroup won't be updated: cgroup path is empty")
  1926  		return nil
  1927  	}
  1928  
  1929  	cgroup, err := cgroupsLoadFunc(V1Constraints, cgroups.StaticPath(s.state.CgroupPath))
  1930  	if err != nil {
  1931  		return fmt.Errorf("Could not load cgroup %v: %v", s.state.CgroupPath, err)
  1932  	}
  1933  
  1934  	if err := s.constrainHypervisor(cgroup); err != nil {
  1935  		return err
  1936  	}
  1937  
  1938  	if len(s.containers) <= 1 {
  1939  		// nothing to update
  1940  		return nil
  1941  	}
  1942  
  1943  	resources, err := s.resources()
  1944  	if err != nil {
  1945  		return err
  1946  	}
  1947  
  1948  	if err := cgroup.Update(&resources); err != nil {
  1949  		return fmt.Errorf("Could not update sandbox cgroup path='%v' error='%v'", s.state.CgroupPath, err)
  1950  	}
  1951  
  1952  	return nil
  1953  }
  1954  
  1955  // cgroupsDelete will move the running processes in the sandbox cgroup
  1956  // to the parent and then delete the sandbox cgroup
  1957  func (s *Sandbox) cgroupsDelete() error {
  1958  	s.Logger().Debug("Deleting sandbox cgroup")
  1959  	if s.state.CgroupPath == "" {
  1960  		s.Logger().Warnf("sandbox cgroups path is empty")
  1961  		return nil
  1962  	}
  1963  
  1964  	var path string
  1965  	var cgroupSubsystems cgroups.Hierarchy
  1966  
  1967  	if s.config.SandboxCgroupOnly {
  1968  		return s.cgroupMgr.Destroy()
  1969  	}
  1970  
  1971  	cgroupSubsystems = V1NoConstraints
  1972  	path = cgroupNoConstraintsPath(s.state.CgroupPath)
  1973  	s.Logger().WithField("path", path).Debug("Deleting no constraints cgroup")
  1974  
  1975  	sandboxCgroups, err := cgroupsLoadFunc(cgroupSubsystems, cgroups.StaticPath(path))
  1976  	if err == cgroups.ErrCgroupDeleted {
  1977  		// cgroup already deleted
  1978  		s.Logger().Warnf("cgroup already deleted: '%s'", err)
  1979  		return nil
  1980  	}
  1981  
  1982  	if err != nil {
  1983  		return fmt.Errorf("Could not load cgroups %v: %v", path, err)
  1984  	}
  1985  
  1986  	// move running process here, that way cgroup can be removed
  1987  	parent, err := parentCgroup(cgroupSubsystems, path)
  1988  	if err != nil {
  1989  		// parent cgroup doesn't exist, that means there are no process running
  1990  		// and the no constraints cgroup was removed.
  1991  		s.Logger().WithError(err).Warn("Parent cgroup doesn't exist")
  1992  		return nil
  1993  	}
  1994  
  1995  	if err := sandboxCgroups.MoveTo(parent); err != nil {
  1996  		// Don't fail, cgroup can be deleted
  1997  		s.Logger().WithError(err).Warnf("Could not move process from %s to parent cgroup", path)
  1998  	}
  1999  
  2000  	return sandboxCgroups.Delete()
  2001  }
  2002  
  2003  // constrainHypervisor will place the VMM and vCPU threads into cgroups.
  2004  func (s *Sandbox) constrainHypervisor(cgroup cgroups.Cgroup) error {
  2005  	// VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set.
  2006  	// This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take
  2007  	// Kata/VMM into account, Kata may fail to boot due to being overconstrained.
  2008  	// If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained
  2009  	// cgroup
  2010  	if s.config.SandboxCgroupOnly {
  2011  		// Kata components were moved into the sandbox-cgroup already, so VMM
  2012  		// will already land there as well. No need to take action
  2013  		return nil
  2014  	}
  2015  
  2016  	pids := s.hypervisor.getPids()
  2017  	if len(pids) == 0 || pids[0] == 0 {
  2018  		return fmt.Errorf("Invalid hypervisor PID: %+v", pids)
  2019  	}
  2020  
  2021  	// VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set.
  2022  	// This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take
  2023  	// Kata/VMM into account, Kata may fail to boot due to being overconstrained.
  2024  	// If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained
  2025  	// cgroup
  2026  	// Move the VMM into cgroups without constraints, those cgroups are not yet supported.
  2027  	resources := &specs.LinuxResources{}
  2028  	path := cgroupNoConstraintsPath(s.state.CgroupPath)
  2029  	vmmCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources)
  2030  	if err != nil {
  2031  		return fmt.Errorf("Could not create cgroup %v: %v", path, err)
  2032  	}
  2033  
  2034  	for _, pid := range pids {
  2035  		if pid <= 0 {
  2036  			s.Logger().Warnf("Invalid hypervisor pid: %d", pid)
  2037  			continue
  2038  		}
  2039  
  2040  		if err := vmmCgroup.Add(cgroups.Process{Pid: pid}); err != nil {
  2041  			return fmt.Errorf("Could not add hypervisor PID %d to cgroup: %v", pid, err)
  2042  		}
  2043  	}
  2044  
  2045  	// when new container joins, new CPU could be hotplugged, so we
  2046  	// have to query fresh vcpu info from hypervisor every time.
  2047  	tids, err := s.hypervisor.getThreadIDs()
  2048  	if err != nil {
  2049  		return fmt.Errorf("failed to get thread ids from hypervisor: %v", err)
  2050  	}
  2051  	if len(tids.vcpus) == 0 {
  2052  		// If there's no tid returned from the hypervisor, this is not
  2053  		// a bug. It simply means there is nothing to constrain, hence
  2054  		// let's return without any error from here.
  2055  		return nil
  2056  	}
  2057  
  2058  	// Move vcpus (threads) into cgroups with constraints.
  2059  	// Move whole hypervisor process would be easier but the IO/network performance
  2060  	// would be over-constrained.
  2061  	for _, i := range tids.vcpus {
  2062  		// In contrast, AddTask will write thread id to `tasks`
  2063  		// After this, vcpu threads are in "vcpu" sub-cgroup, other threads in
  2064  		// qemu will be left in parent cgroup untouched.
  2065  		if err := cgroup.AddTask(cgroups.Process{
  2066  			Pid: i,
  2067  		}); err != nil {
  2068  			return err
  2069  		}
  2070  	}
  2071  
  2072  	return nil
  2073  }
  2074  
  2075  func (s *Sandbox) resources() (specs.LinuxResources, error) {
  2076  	resources := specs.LinuxResources{
  2077  		CPU: s.cpuResources(),
  2078  	}
  2079  
  2080  	return resources, nil
  2081  }
  2082  
  2083  func (s *Sandbox) cpuResources() *specs.LinuxCPU {
  2084  	// Use default period and quota if they are not specified.
  2085  	// Container will inherit the constraints from its parent.
  2086  	quota := int64(0)
  2087  	period := uint64(0)
  2088  	shares := uint64(0)
  2089  	realtimePeriod := uint64(0)
  2090  	realtimeRuntime := int64(0)
  2091  
  2092  	cpu := &specs.LinuxCPU{
  2093  		Quota:           &quota,
  2094  		Period:          &period,
  2095  		Shares:          &shares,
  2096  		RealtimePeriod:  &realtimePeriod,
  2097  		RealtimeRuntime: &realtimeRuntime,
  2098  	}
  2099  
  2100  	for _, c := range s.containers {
  2101  		ann := c.GetAnnotations()
  2102  		if ann[annotations.ContainerTypeKey] == string(PodSandbox) {
  2103  			// skip sandbox container
  2104  			continue
  2105  		}
  2106  
  2107  		if c.config.Resources.CPU == nil {
  2108  			continue
  2109  		}
  2110  
  2111  		if c.config.Resources.CPU.Shares != nil {
  2112  			shares = uint64(math.Max(float64(*c.config.Resources.CPU.Shares), float64(shares)))
  2113  		}
  2114  
  2115  		if c.config.Resources.CPU.Quota != nil {
  2116  			quota += *c.config.Resources.CPU.Quota
  2117  		}
  2118  
  2119  		if c.config.Resources.CPU.Period != nil {
  2120  			period = uint64(math.Max(float64(*c.config.Resources.CPU.Period), float64(period)))
  2121  		}
  2122  
  2123  		if c.config.Resources.CPU.Cpus != "" {
  2124  			cpu.Cpus += c.config.Resources.CPU.Cpus + ","
  2125  		}
  2126  
  2127  		if c.config.Resources.CPU.RealtimeRuntime != nil {
  2128  			realtimeRuntime += *c.config.Resources.CPU.RealtimeRuntime
  2129  		}
  2130  
  2131  		if c.config.Resources.CPU.RealtimePeriod != nil {
  2132  			realtimePeriod += *c.config.Resources.CPU.RealtimePeriod
  2133  		}
  2134  
  2135  		if c.config.Resources.CPU.Mems != "" {
  2136  			cpu.Mems += c.config.Resources.CPU.Mems + ","
  2137  		}
  2138  	}
  2139  
  2140  	cpu.Cpus = strings.Trim(cpu.Cpus, " \n\t,")
  2141  
  2142  	return validCPUResources(cpu)
  2143  }
  2144  
  2145  // setupSandboxCgroup creates and joins sandbox cgroups for the sandbox config
  2146  func (s *Sandbox) setupSandboxCgroup() error {
  2147  	var err error
  2148  	spec := s.GetPatchedOCISpec()
  2149  	if spec == nil {
  2150  		return errorMissingOCISpec
  2151  	}
  2152  
  2153  	if spec.Linux == nil {
  2154  		s.Logger().WithField("sandboxid", s.id).Warning("no cgroup path provided for pod sandbox, not creating sandbox cgroup")
  2155  		return nil
  2156  	}
  2157  
  2158  	s.state.CgroupPath, err = vccgroups.ValidCgroupPath(spec.Linux.CgroupsPath, s.config.SystemdCgroup)
  2159  	if err != nil {
  2160  		return fmt.Errorf("Invalid cgroup path: %v", err)
  2161  	}
  2162  
  2163  	runtimePid := os.Getpid()
  2164  	// Add the runtime to the Kata sandbox cgroup
  2165  	if err = s.cgroupMgr.Add(runtimePid); err != nil {
  2166  		return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup:  %v", runtimePid, err)
  2167  	}
  2168  
  2169  	// `Apply` updates manager's Cgroups and CgroupPaths,
  2170  	// they both need to be saved since are used to create
  2171  	// or restore a cgroup managers.
  2172  	if s.config.Cgroups, err = s.cgroupMgr.GetCgroups(); err != nil {
  2173  		return fmt.Errorf("Could not get cgroup configuration:  %v", err)
  2174  	}
  2175  
  2176  	s.state.CgroupPaths = s.cgroupMgr.GetPaths()
  2177  
  2178  	if err = s.cgroupMgr.Apply(); err != nil {
  2179  		return fmt.Errorf("Could not constrain cgroup: %v", err)
  2180  	}
  2181  
  2182  	return nil
  2183  }
  2184  
  2185  // GetPatchedOCISpec returns sandbox's OCI specification
  2186  // This OCI specification was patched when the sandbox was created
  2187  // by containerCapabilities(), SetEphemeralStorageType() and others
  2188  // in order to support:
  2189  // * capabilities
  2190  // * Ephemeral storage
  2191  // * k8s empty dir
  2192  // If you need the original (vanilla) OCI spec,
  2193  // use compatoci.GetContainerSpec() instead.
  2194  func (s *Sandbox) GetPatchedOCISpec() *specs.Spec {
  2195  	if s.config == nil {
  2196  		return nil
  2197  	}
  2198  
  2199  	// get the container associated with the PodSandbox annotation. In Kubernetes, this
  2200  	// represents the pause container. In Docker, this is the container. We derive the
  2201  	// cgroup path from this container.
  2202  	for _, cConfig := range s.config.Containers {
  2203  		if cConfig.Annotations[annotations.ContainerTypeKey] == string(PodSandbox) {
  2204  			return cConfig.CustomSpec
  2205  		}
  2206  	}
  2207  
  2208  	return nil
  2209  }
  2210  
  2211  func (s *Sandbox) GetOOMEvent() (string, error) {
  2212  	return s.agent.getOOMEvent()
  2213  }