github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/runsc/boot/controller.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package boot
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"os"
    21  	"path"
    22  	gtime "time"
    23  
    24  	specs "github.com/opencontainers/runtime-spec/specs-go"
    25  	"golang.org/x/sys/unix"
    26  	"github.com/metacubex/gvisor/pkg/cleanup"
    27  	"github.com/metacubex/gvisor/pkg/context"
    28  	"github.com/metacubex/gvisor/pkg/control/server"
    29  	"github.com/metacubex/gvisor/pkg/fd"
    30  	"github.com/metacubex/gvisor/pkg/fspath"
    31  	"github.com/metacubex/gvisor/pkg/log"
    32  	"github.com/metacubex/gvisor/pkg/sentry/control"
    33  	"github.com/metacubex/gvisor/pkg/sentry/fsimpl/erofs"
    34  	"github.com/metacubex/gvisor/pkg/sentry/kernel"
    35  	"github.com/metacubex/gvisor/pkg/sentry/seccheck"
    36  	"github.com/metacubex/gvisor/pkg/sentry/socket/netstack"
    37  	"github.com/metacubex/gvisor/pkg/sentry/vfs"
    38  	"github.com/metacubex/gvisor/pkg/urpc"
    39  	"github.com/metacubex/gvisor/runsc/boot/procfs"
    40  	"github.com/metacubex/gvisor/runsc/config"
    41  	"github.com/metacubex/gvisor/runsc/specutils"
    42  )
    43  
    44  const (
    45  	// ContMgrCheckpoint checkpoints a container.
    46  	ContMgrCheckpoint = "containerManager.Checkpoint"
    47  
    48  	// ContMgrCreateSubcontainer creates a sub-container.
    49  	ContMgrCreateSubcontainer = "containerManager.CreateSubcontainer"
    50  
    51  	// ContMgrDestroySubcontainer is used to stop a sub-container and free all
    52  	// associated resources in the sandbox.
    53  	ContMgrDestroySubcontainer = "containerManager.DestroySubcontainer"
    54  
    55  	// ContMgrEvent gets stats about the container used by "runsc events".
    56  	ContMgrEvent = "containerManager.Event"
    57  
    58  	// ContMgrExecuteAsync executes a command in a container.
    59  	ContMgrExecuteAsync = "containerManager.ExecuteAsync"
    60  
    61  	// ContMgrPortForward starts port forwarding with the sandbox.
    62  	ContMgrPortForward = "containerManager.PortForward"
    63  
    64  	// ContMgrProcesses lists processes running in a container.
    65  	ContMgrProcesses = "containerManager.Processes"
    66  
    67  	// ContMgrRestore restores a container from a statefile.
    68  	ContMgrRestore = "containerManager.Restore"
    69  
    70  	// ContMgrSignal sends a signal to a container.
    71  	ContMgrSignal = "containerManager.Signal"
    72  
    73  	// ContMgrStartSubcontainer starts a sub-container inside a running sandbox.
    74  	ContMgrStartSubcontainer = "containerManager.StartSubcontainer"
    75  
    76  	// ContMgrWait waits on the init process of the container and returns its
    77  	// ExitStatus.
    78  	ContMgrWait = "containerManager.Wait"
    79  
    80  	// ContMgrWaitPID waits on a process with a certain PID in the sandbox and
    81  	// return its ExitStatus.
    82  	ContMgrWaitPID = "containerManager.WaitPID"
    83  
    84  	// ContMgrRootContainerStart starts a new sandbox with a root container.
    85  	ContMgrRootContainerStart = "containerManager.StartRoot"
    86  
    87  	// ContMgrCreateTraceSession starts a trace session.
    88  	ContMgrCreateTraceSession = "containerManager.CreateTraceSession"
    89  
    90  	// ContMgrDeleteTraceSession deletes a trace session.
    91  	ContMgrDeleteTraceSession = "containerManager.DeleteTraceSession"
    92  
    93  	// ContMgrListTraceSessions lists a trace session.
    94  	ContMgrListTraceSessions = "containerManager.ListTraceSessions"
    95  
    96  	// ContMgrProcfsDump dumps sandbox procfs state.
    97  	ContMgrProcfsDump = "containerManager.ProcfsDump"
    98  
    99  	// ContMgrMount mounts a filesystem in a container.
   100  	ContMgrMount = "containerManager.Mount"
   101  )
   102  
   103  const (
   104  	// NetworkCreateLinksAndRoutes creates links and routes in a network stack.
   105  	NetworkCreateLinksAndRoutes = "Network.CreateLinksAndRoutes"
   106  
   107  	// DebugStacks collects sandbox stacks for debugging.
   108  	DebugStacks = "debug.Stacks"
   109  )
   110  
   111  // Profiling related commands (see pprof.go for more details).
   112  const (
   113  	ProfileCPU   = "Profile.CPU"
   114  	ProfileHeap  = "Profile.Heap"
   115  	ProfileBlock = "Profile.Block"
   116  	ProfileMutex = "Profile.Mutex"
   117  	ProfileTrace = "Profile.Trace"
   118  )
   119  
   120  // Logging related commands (see logging.go for more details).
   121  const (
   122  	LoggingChange = "Logging.Change"
   123  )
   124  
   125  // Lifecycle related commands (see lifecycle.go for more details).
   126  const (
   127  	LifecyclePause  = "Lifecycle.Pause"
   128  	LifecycleResume = "Lifecycle.Resume"
   129  )
   130  
   131  // Usage related commands (see usage.go for more details).
   132  const (
   133  	UsageCollect = "Usage.Collect"
   134  	UsageUsageFD = "Usage.UsageFD"
   135  )
   136  
   137  // Metrics related commands (see metrics.go).
   138  const (
   139  	MetricsGetRegistered = "Metrics.GetRegisteredMetrics"
   140  	MetricsExport        = "Metrics.Export"
   141  )
   142  
   143  // Commands for interacting with cgroupfs within the sandbox.
   144  const (
   145  	CgroupsReadControlFiles  = "Cgroups.ReadControlFiles"
   146  	CgroupsWriteControlFiles = "Cgroups.WriteControlFiles"
   147  )
   148  
   149  // controller holds the control server, and is used for communication into the
   150  // sandbox.
   151  type controller struct {
   152  	// srv is the control server.
   153  	srv *server.Server
   154  
   155  	// manager holds the containerManager methods.
   156  	manager *containerManager
   157  }
   158  
   159  // newController creates a new controller. The caller must call
   160  // controller.srv.StartServing() to start the controller.
   161  func newController(fd int, l *Loader) (*controller, error) {
   162  	srv, err := server.CreateFromFD(fd)
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  
   167  	ctrl := &controller{
   168  		manager: &containerManager{
   169  			startChan:       make(chan struct{}),
   170  			startResultChan: make(chan error),
   171  			l:               l,
   172  		},
   173  		srv: srv,
   174  	}
   175  	ctrl.srv.Register(ctrl.manager)
   176  	ctrl.srv.Register(&control.Cgroups{Kernel: l.k})
   177  	ctrl.srv.Register(&control.Lifecycle{Kernel: l.k})
   178  	ctrl.srv.Register(&control.Logging{})
   179  	ctrl.srv.Register(&control.Proc{Kernel: l.k})
   180  	ctrl.srv.Register(&control.State{Kernel: l.k})
   181  	ctrl.srv.Register(&control.Usage{Kernel: l.k})
   182  	ctrl.srv.Register(&control.Metrics{})
   183  	ctrl.srv.Register(&debug{})
   184  
   185  	if eps, ok := l.k.RootNetworkNamespace().Stack().(*netstack.Stack); ok {
   186  		ctrl.srv.Register(&Network{
   187  			Stack:  eps.Stack,
   188  			Kernel: l.k,
   189  		})
   190  	}
   191  	if l.root.conf.ProfileEnable {
   192  		ctrl.srv.Register(control.NewProfile(l.k))
   193  	}
   194  	return ctrl, nil
   195  }
   196  
   197  // stopRPCTimeout is the time for clients to complete ongoing RPCs.
   198  const stopRPCTimeout = 15 * gtime.Second
   199  
   200  func (c *controller) stop() {
   201  	c.srv.Stop(stopRPCTimeout)
   202  }
   203  
   204  // containerManager manages sandbox containers.
   205  type containerManager struct {
   206  	// startChan is used to signal when the root container process should
   207  	// be started.
   208  	startChan chan struct{}
   209  
   210  	// startResultChan is used to signal when the root container  has
   211  	// started. Any errors encountered during startup will be sent to the
   212  	// channel. A nil value indicates success.
   213  	startResultChan chan error
   214  
   215  	// l is the loader that creates containers and sandboxes.
   216  	l *Loader
   217  }
   218  
   219  // StartRoot will start the root container process.
   220  func (cm *containerManager) StartRoot(cid *string, _ *struct{}) error {
   221  	log.Debugf("containerManager.StartRoot, cid: %s", *cid)
   222  	// Tell the root container to start and wait for the result.
   223  	cm.startChan <- struct{}{}
   224  	if err := <-cm.startResultChan; err != nil {
   225  		return fmt.Errorf("starting sandbox: %v", err)
   226  	}
   227  	return nil
   228  }
   229  
   230  // Processes retrieves information about processes running in the sandbox.
   231  func (cm *containerManager) Processes(cid *string, out *[]*control.Process) error {
   232  	log.Debugf("containerManager.Processes, cid: %s", *cid)
   233  	return control.Processes(cm.l.k, *cid, out)
   234  }
   235  
   236  // CreateArgs contains arguments to the Create method.
   237  type CreateArgs struct {
   238  	// CID is the ID of the container to start.
   239  	CID string
   240  
   241  	// FilePayload may contain a TTY file for the terminal, if enabled.
   242  	urpc.FilePayload
   243  }
   244  
   245  // CreateSubcontainer creates a container within a sandbox.
   246  func (cm *containerManager) CreateSubcontainer(args *CreateArgs, _ *struct{}) error {
   247  	log.Debugf("containerManager.CreateSubcontainer: %s", args.CID)
   248  
   249  	if len(args.Files) > 1 {
   250  		return fmt.Errorf("start arguments must have at most 1 files for TTY")
   251  	}
   252  	var tty *fd.FD
   253  	if len(args.Files) == 1 {
   254  		var err error
   255  		tty, err = fd.NewFromFile(args.Files[0])
   256  		if err != nil {
   257  			return fmt.Errorf("error dup'ing TTY file: %w", err)
   258  		}
   259  	}
   260  	return cm.l.createSubcontainer(args.CID, tty)
   261  }
   262  
   263  // StartArgs contains arguments to the Start method.
   264  type StartArgs struct {
   265  	// Spec is the spec of the container to start.
   266  	Spec *specs.Spec
   267  
   268  	// Config is the runsc-specific configuration for the sandbox.
   269  	Conf *config.Config
   270  
   271  	// CID is the ID of the container to start.
   272  	CID string
   273  
   274  	// NumGoferFilestoreFDs is the number of gofer filestore FDs donated.
   275  	NumGoferFilestoreFDs int
   276  
   277  	// IsDevIoFilePresent indicates whether the dev gofer FD is present.
   278  	IsDevIoFilePresent bool
   279  
   280  	// GoferMountConfs contains information about how the gofer mounts have been
   281  	// configured. The first entry is for rootfs and the following entries are
   282  	// for bind mounts in Spec.Mounts (in the same order).
   283  	GoferMountConfs []GoferMountConf
   284  
   285  	// FilePayload contains, in order:
   286  	//   * stdin, stdout, and stderr (optional: if terminal is disabled).
   287  	//   * file descriptors to gofer-backing host files (optional).
   288  	//   * file descriptor for /dev gofer connection (optional)
   289  	//   * file descriptors to connect to gofer to serve the root filesystem.
   290  	urpc.FilePayload
   291  }
   292  
   293  // StartSubcontainer runs a created container within a sandbox.
   294  func (cm *containerManager) StartSubcontainer(args *StartArgs, _ *struct{}) error {
   295  	// Validate arguments.
   296  	if args == nil {
   297  		return errors.New("start missing arguments")
   298  	}
   299  	log.Debugf("containerManager.StartSubcontainer, cid: %s, args: %+v", args.CID, args)
   300  	if args.Spec == nil {
   301  		return errors.New("start arguments missing spec")
   302  	}
   303  	if args.Conf == nil {
   304  		return errors.New("start arguments missing config")
   305  	}
   306  	if args.CID == "" {
   307  		return errors.New("start argument missing container ID")
   308  	}
   309  	expectedFDs := 1 // At least one FD for the root filesystem.
   310  	expectedFDs += args.NumGoferFilestoreFDs
   311  	if args.IsDevIoFilePresent {
   312  		expectedFDs++
   313  	}
   314  	if !args.Spec.Process.Terminal {
   315  		expectedFDs += 3
   316  	}
   317  	if len(args.Files) < expectedFDs {
   318  		return fmt.Errorf("start arguments must contain at least %d FDs, but only got %d", expectedFDs, len(args.Files))
   319  	}
   320  
   321  	// All validation passed, logs the spec for debugging.
   322  	specutils.LogSpecDebug(args.Spec, args.Conf.OCISeccomp)
   323  
   324  	goferFiles := args.Files
   325  	var stdios []*fd.FD
   326  	if !args.Spec.Process.Terminal {
   327  		// When not using a terminal, stdios come as the first 3 files in the
   328  		// payload.
   329  		var err error
   330  		stdios, err = fd.NewFromFiles(goferFiles[:3])
   331  		if err != nil {
   332  			return fmt.Errorf("error dup'ing stdio files: %w", err)
   333  		}
   334  		goferFiles = goferFiles[3:]
   335  	}
   336  	defer func() {
   337  		for _, fd := range stdios {
   338  			_ = fd.Close()
   339  		}
   340  	}()
   341  
   342  	var goferFilestoreFDs []*fd.FD
   343  	for i := 0; i < args.NumGoferFilestoreFDs; i++ {
   344  		goferFilestoreFD, err := fd.NewFromFile(goferFiles[i])
   345  		if err != nil {
   346  			return fmt.Errorf("error dup'ing gofer filestore file: %w", err)
   347  		}
   348  		goferFilestoreFDs = append(goferFilestoreFDs, goferFilestoreFD)
   349  	}
   350  	goferFiles = goferFiles[args.NumGoferFilestoreFDs:]
   351  	defer func() {
   352  		for _, fd := range goferFilestoreFDs {
   353  			_ = fd.Close()
   354  		}
   355  	}()
   356  
   357  	var devGoferFD *fd.FD
   358  	if args.IsDevIoFilePresent {
   359  		var err error
   360  		devGoferFD, err = fd.NewFromFile(goferFiles[0])
   361  		if err != nil {
   362  			return fmt.Errorf("error dup'ing dev gofer file: %w", err)
   363  		}
   364  		goferFiles = goferFiles[1:]
   365  		defer devGoferFD.Close()
   366  	}
   367  
   368  	goferFDs, err := fd.NewFromFiles(goferFiles)
   369  	if err != nil {
   370  		return fmt.Errorf("error dup'ing gofer files: %w", err)
   371  	}
   372  	defer func() {
   373  		for _, fd := range goferFDs {
   374  			_ = fd.Close()
   375  		}
   376  	}()
   377  
   378  	if err := cm.l.startSubcontainer(args.Spec, args.Conf, args.CID, stdios, goferFDs, goferFilestoreFDs, devGoferFD, args.GoferMountConfs); err != nil {
   379  		log.Debugf("containerManager.StartSubcontainer failed, cid: %s, args: %+v, err: %v", args.CID, args, err)
   380  		return err
   381  	}
   382  	log.Debugf("Container started, cid: %s", args.CID)
   383  	return nil
   384  }
   385  
   386  // DestroySubcontainer stops a container if it is still running and cleans up
   387  // its filesystem.
   388  func (cm *containerManager) DestroySubcontainer(cid *string, _ *struct{}) error {
   389  	log.Debugf("containerManager.DestroySubcontainer, cid: %s", *cid)
   390  	return cm.l.destroySubcontainer(*cid)
   391  }
   392  
   393  // ExecuteAsync starts running a command on a created or running sandbox. It
   394  // returns the PID of the new process.
   395  func (cm *containerManager) ExecuteAsync(args *control.ExecArgs, pid *int32) error {
   396  	log.Debugf("containerManager.ExecuteAsync, cid: %s, args: %+v", args.ContainerID, args)
   397  	tgid, err := cm.l.executeAsync(args)
   398  	if err != nil {
   399  		log.Debugf("containerManager.ExecuteAsync failed, cid: %s, args: %+v, err: %v", args.ContainerID, args, err)
   400  		return err
   401  	}
   402  	*pid = int32(tgid)
   403  	return nil
   404  }
   405  
   406  // Checkpoint pauses a sandbox and saves its state.
   407  func (cm *containerManager) Checkpoint(o *control.SaveOpts, _ *struct{}) error {
   408  	log.Debugf("containerManager.Checkpoint")
   409  	// TODO(gvisor.dev/issues/6243): save/restore not supported w/ hostinet
   410  	if cm.l.root.conf.Network == config.NetworkHost {
   411  		return errors.New("checkpoint not supported when using hostinet")
   412  	}
   413  
   414  	state := control.State{
   415  		Kernel:   cm.l.k,
   416  		Watchdog: cm.l.watchdog,
   417  	}
   418  	return state.Save(o, nil)
   419  }
   420  
   421  // PortForwardOpts contains options for port forwarding to a port in a
   422  // container.
   423  type PortForwardOpts struct {
   424  	// FilePayload contains one fd for a UDS (or local port) used for port
   425  	// forwarding.
   426  	urpc.FilePayload
   427  
   428  	// ContainerID is the container for the process being executed.
   429  	ContainerID string
   430  	// Port is the port to to forward.
   431  	Port uint16
   432  }
   433  
   434  // PortForward initiates a port forward to the container.
   435  func (cm *containerManager) PortForward(opts *PortForwardOpts, _ *struct{}) error {
   436  	log.Debugf("containerManager.PortForward, cid: %s, port: %d", opts.ContainerID, opts.Port)
   437  	if err := cm.l.portForward(opts); err != nil {
   438  		log.Debugf("containerManager.PortForward failed, opts: %+v, err: %v", opts, err)
   439  		return err
   440  	}
   441  	return nil
   442  }
   443  
   444  // RestoreOpts contains options related to restoring a container's file system.
   445  type RestoreOpts struct {
   446  	// FilePayload contains the state file to be restored, followed by the
   447  	// platform device file if necessary.
   448  	urpc.FilePayload
   449  
   450  	// SandboxID contains the ID of the sandbox.
   451  	SandboxID string
   452  }
   453  
   454  // Restore loads a container from a statefile.
   455  // The container's current kernel is destroyed, a restore environment is
   456  // created, and the kernel is recreated with the restore state file. The
   457  // container then sends the signal to start.
   458  func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
   459  	log.Debugf("containerManager.Restore")
   460  
   461  	r := restorer{container: &cm.l.root}
   462  	switch numFiles := len(o.Files); numFiles {
   463  	case 2:
   464  		// The device file is donated to the platform.
   465  		// Can't take ownership away from os.File. dup them to get a new FD.
   466  		fd, err := unix.Dup(int(o.Files[1].Fd()))
   467  		if err != nil {
   468  			return fmt.Errorf("failed to dup file: %v", err)
   469  		}
   470  		r.deviceFile = os.NewFile(uintptr(fd), "platform device")
   471  		fallthrough
   472  	case 1:
   473  		r.stateFile = o.Files[0]
   474  		if info, err := r.stateFile.Stat(); err != nil {
   475  			return err
   476  		} else if info.Size() == 0 {
   477  			return fmt.Errorf("file cannot be empty")
   478  		}
   479  
   480  	case 0:
   481  		return fmt.Errorf("at least one file must be passed to Restore")
   482  	default:
   483  		return fmt.Errorf("at most two files may be passed to Restore")
   484  	}
   485  
   486  	// Pause the kernel while we build a new one.
   487  	cm.l.k.Pause()
   488  
   489  	if err := r.restore(cm.l); err != nil {
   490  		return err
   491  	}
   492  
   493  	// Tell the root container to start and wait for the result.
   494  	cm.startChan <- struct{}{}
   495  	if err := <-cm.startResultChan; err != nil {
   496  		return fmt.Errorf("starting sandbox: %v", err)
   497  	}
   498  
   499  	return nil
   500  }
   501  
   502  // Wait waits for the init process in the given container.
   503  func (cm *containerManager) Wait(cid *string, waitStatus *uint32) error {
   504  	log.Debugf("containerManager.Wait, cid: %s", *cid)
   505  	err := cm.l.waitContainer(*cid, waitStatus)
   506  	log.Debugf("containerManager.Wait returned, cid: %s, waitStatus: %#x, err: %v", *cid, *waitStatus, err)
   507  	return err
   508  }
   509  
   510  // WaitPIDArgs are arguments to the WaitPID method.
   511  type WaitPIDArgs struct {
   512  	// PID is the PID in the container's PID namespace.
   513  	PID int32
   514  
   515  	// CID is the container ID.
   516  	CID string
   517  }
   518  
   519  // WaitPID waits for the process with PID 'pid' in the sandbox.
   520  func (cm *containerManager) WaitPID(args *WaitPIDArgs, waitStatus *uint32) error {
   521  	log.Debugf("containerManager.Wait, cid: %s, pid: %d", args.CID, args.PID)
   522  	err := cm.l.waitPID(kernel.ThreadID(args.PID), args.CID, waitStatus)
   523  	log.Debugf("containerManager.Wait, cid: %s, pid: %d, waitStatus: %#x, err: %v", args.CID, args.PID, *waitStatus, err)
   524  	return err
   525  }
   526  
   527  // SignalDeliveryMode enumerates different signal delivery modes.
   528  type SignalDeliveryMode int
   529  
   530  const (
   531  	// DeliverToProcess delivers the signal to the container process with
   532  	// the specified PID. If PID is 0, then the container init process is
   533  	// signaled.
   534  	DeliverToProcess SignalDeliveryMode = iota
   535  
   536  	// DeliverToAllProcesses delivers the signal to all processes in the
   537  	// container. PID must be 0.
   538  	DeliverToAllProcesses
   539  
   540  	// DeliverToForegroundProcessGroup delivers the signal to the
   541  	// foreground process group in the same TTY session as the specified
   542  	// process. If PID is 0, then the signal is delivered to the foreground
   543  	// process group for the TTY for the init process.
   544  	DeliverToForegroundProcessGroup
   545  )
   546  
   547  func (s SignalDeliveryMode) String() string {
   548  	switch s {
   549  	case DeliverToProcess:
   550  		return "Process"
   551  	case DeliverToAllProcesses:
   552  		return "All"
   553  	case DeliverToForegroundProcessGroup:
   554  		return "Foreground Process Group"
   555  	}
   556  	return fmt.Sprintf("unknown signal delivery mode: %d", s)
   557  }
   558  
   559  // SignalArgs are arguments to the Signal method.
   560  type SignalArgs struct {
   561  	// CID is the container ID.
   562  	CID string
   563  
   564  	// Signo is the signal to send to the process.
   565  	Signo int32
   566  
   567  	// PID is the process ID in the given container that will be signaled,
   568  	// relative to the root PID namespace, not the container's.
   569  	// If 0, the root container will be signalled.
   570  	PID int32
   571  
   572  	// Mode is the signal delivery mode.
   573  	Mode SignalDeliveryMode
   574  }
   575  
   576  // Signal sends a signal to one or more processes in a container. If args.PID
   577  // is 0, then the container init process is used. Depending on the
   578  // args.SignalDeliveryMode option, the signal may be sent directly to the
   579  // indicated process, to all processes in the container, or to the foreground
   580  // process group.
   581  func (cm *containerManager) Signal(args *SignalArgs, _ *struct{}) error {
   582  	log.Debugf("containerManager.Signal: cid: %s, PID: %d, signal: %d, mode: %v", args.CID, args.PID, args.Signo, args.Mode)
   583  	return cm.l.signal(args.CID, args.PID, args.Signo, args.Mode)
   584  }
   585  
   586  // CreateTraceSessionArgs are arguments to the CreateTraceSession method.
   587  type CreateTraceSessionArgs struct {
   588  	Config seccheck.SessionConfig
   589  	Force  bool
   590  	urpc.FilePayload
   591  }
   592  
   593  // CreateTraceSession creates a new trace session.
   594  func (cm *containerManager) CreateTraceSession(args *CreateTraceSessionArgs, _ *struct{}) error {
   595  	log.Debugf("containerManager.CreateTraceSession: config: %+v", args.Config)
   596  	for i, sinkFile := range args.Files {
   597  		if sinkFile != nil {
   598  			fd, err := fd.NewFromFile(sinkFile)
   599  			if err != nil {
   600  				return err
   601  			}
   602  			args.Config.Sinks[i].FD = fd
   603  		}
   604  	}
   605  	return seccheck.Create(&args.Config, args.Force)
   606  }
   607  
   608  // DeleteTraceSession deletes an existing trace session.
   609  func (cm *containerManager) DeleteTraceSession(name *string, _ *struct{}) error {
   610  	log.Debugf("containerManager.DeleteTraceSession: name: %q", *name)
   611  	return seccheck.Delete(*name)
   612  }
   613  
   614  // ListTraceSessions lists trace sessions.
   615  func (cm *containerManager) ListTraceSessions(_ *struct{}, out *[]seccheck.SessionConfig) error {
   616  	log.Debugf("containerManager.ListTraceSessions")
   617  	seccheck.List(out)
   618  	return nil
   619  }
   620  
   621  // ProcfsDump dumps procfs state of the sandbox.
   622  func (cm *containerManager) ProcfsDump(_ *struct{}, out *[]procfs.ProcessProcfsDump) error {
   623  	log.Debugf("containerManager.ProcfsDump")
   624  	ts := cm.l.k.TaskSet()
   625  	pidns := ts.Root
   626  	*out = make([]procfs.ProcessProcfsDump, 0, len(cm.l.processes))
   627  	for _, tg := range pidns.ThreadGroups() {
   628  		pid := pidns.IDOfThreadGroup(tg)
   629  		procDump, err := procfs.Dump(tg.Leader(), pid, pidns)
   630  		if err != nil {
   631  			log.Warningf("skipping procfs dump for PID %s: %v", pid, err)
   632  			continue
   633  		}
   634  		*out = append(*out, procDump)
   635  	}
   636  	return nil
   637  }
   638  
   639  // MountArgs contains arguments to the Mount method.
   640  type MountArgs struct {
   641  	// ContainerID is the container in which we will mount the filesystem.
   642  	ContainerID string
   643  
   644  	// Source is the mount source.
   645  	Source string
   646  
   647  	// Destination is the mount target.
   648  	Destination string
   649  
   650  	// FsType is the filesystem type.
   651  	FsType string
   652  
   653  	// FilePayload contains the source image FD, if required by the filesystem.
   654  	urpc.FilePayload
   655  }
   656  
   657  const initTID kernel.ThreadID = 1
   658  
   659  // Mount mounts a filesystem in a container.
   660  func (cm *containerManager) Mount(args *MountArgs, _ *struct{}) error {
   661  	log.Debugf("containerManager.Mount, cid: %s, args: %+v", args.ContainerID, args)
   662  
   663  	var cu cleanup.Cleanup
   664  	defer cu.Clean()
   665  
   666  	eid := execID{cid: args.ContainerID}
   667  	ep, ok := cm.l.processes[eid]
   668  	if !ok {
   669  		return fmt.Errorf("container %v is deleted", args.ContainerID)
   670  	}
   671  	if ep.tg == nil {
   672  		return fmt.Errorf("container %v isn't started", args.ContainerID)
   673  	}
   674  
   675  	t := ep.tg.PIDNamespace().TaskWithID(initTID)
   676  	if t == nil {
   677  		return fmt.Errorf("failed to find init process")
   678  	}
   679  
   680  	source := args.Source
   681  	dest := path.Clean(args.Destination)
   682  	fstype := args.FsType
   683  
   684  	if dest[0] != '/' {
   685  		return fmt.Errorf("absolute path must be provided for destination")
   686  	}
   687  
   688  	var opts vfs.MountOptions
   689  	switch fstype {
   690  	case erofs.Name:
   691  		if len(args.FilePayload.Files) != 1 {
   692  			return fmt.Errorf("exactly one image file must be provided")
   693  		}
   694  
   695  		imageFD, err := unix.Dup(int(args.FilePayload.Files[0].Fd()))
   696  		if err != nil {
   697  			return fmt.Errorf("failed to dup image FD: %v", err)
   698  		}
   699  		cu.Add(func() { unix.Close(imageFD) })
   700  
   701  		opts = vfs.MountOptions{
   702  			ReadOnly: true,
   703  			GetFilesystemOptions: vfs.GetFilesystemOptions{
   704  				InternalMount: true,
   705  				Data:          fmt.Sprintf("ifd=%d", imageFD),
   706  			},
   707  		}
   708  
   709  	default:
   710  		return fmt.Errorf("unsupported filesystem type: %v", fstype)
   711  	}
   712  
   713  	ctx := context.Background()
   714  	root := t.FSContext().RootDirectory()
   715  	defer root.DecRef(ctx)
   716  
   717  	pop := vfs.PathOperation{
   718  		Root:  root,
   719  		Start: root,
   720  		Path:  fspath.Parse(dest),
   721  	}
   722  
   723  	if _, err := t.Kernel().VFS().MountAt(ctx, t.Credentials(), source, &pop, fstype, &opts); err != nil {
   724  		return err
   725  	}
   726  	log.Infof("Mounted %q to %q type: %s, internal-options: %q, in container %q", source, dest, fstype, opts.GetFilesystemOptions.Data, args.ContainerID)
   727  	cu.Release()
   728  	return nil
   729  }