github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/container_linux.go (about)

     1  package libcontainer
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"os/exec"
    10  	"path"
    11  	"path/filepath"
    12  	"reflect"
    13  	"strconv"
    14  	"strings"
    15  	"sync"
    16  	"time"
    17  
    18  	"github.com/opencontainers/runtime-spec/specs-go"
    19  	"github.com/sirupsen/logrus"
    20  	"github.com/vishvananda/netlink/nl"
    21  	"golang.org/x/sys/execabs"
    22  	"golang.org/x/sys/unix"
    23  
    24  	"github.com/opencontainers/runc/libcontainer/cgroups"
    25  	"github.com/opencontainers/runc/libcontainer/configs"
    26  	"github.com/opencontainers/runc/libcontainer/dmz"
    27  	"github.com/opencontainers/runc/libcontainer/intelrdt"
    28  	"github.com/opencontainers/runc/libcontainer/system"
    29  	"github.com/opencontainers/runc/libcontainer/system/kernelversion"
    30  	"github.com/opencontainers/runc/libcontainer/utils"
    31  )
    32  
    33  const stdioFdCount = 3
    34  
    35  // Container is a libcontainer container object.
    36  type Container struct {
    37  	id                   string
    38  	stateDir             string
    39  	config               *configs.Config
    40  	cgroupManager        cgroups.Manager
    41  	intelRdtManager      *intelrdt.Manager
    42  	initProcess          parentProcess
    43  	initProcessStartTime uint64
    44  	m                    sync.Mutex
    45  	criuVersion          int
    46  	state                containerState
    47  	created              time.Time
    48  	fifo                 *os.File
    49  }
    50  
    51  // State represents a running container's state
    52  type State struct {
    53  	BaseState
    54  
    55  	// Platform specific fields below here
    56  
    57  	// Specified if the container was started under the rootless mode.
    58  	// Set to true if BaseState.Config.RootlessEUID && BaseState.Config.RootlessCgroups
    59  	Rootless bool `json:"rootless"`
    60  
    61  	// Paths to all the container's cgroups, as returned by (*cgroups.Manager).GetPaths
    62  	//
    63  	// For cgroup v1, a key is cgroup subsystem name, and the value is the path
    64  	// to the cgroup for this subsystem.
    65  	//
    66  	// For cgroup v2 unified hierarchy, a key is "", and the value is the unified path.
    67  	CgroupPaths map[string]string `json:"cgroup_paths"`
    68  
    69  	// NamespacePaths are filepaths to the container's namespaces. Key is the namespace type
    70  	// with the value as the path.
    71  	NamespacePaths map[configs.NamespaceType]string `json:"namespace_paths"`
    72  
    73  	// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
    74  	ExternalDescriptors []string `json:"external_descriptors,omitempty"`
    75  
    76  	// Intel RDT "resource control" filesystem path
    77  	IntelRdtPath string `json:"intel_rdt_path"`
    78  }
    79  
    80  // ID returns the container's unique ID
    81  func (c *Container) ID() string {
    82  	return c.id
    83  }
    84  
    85  // Config returns the container's configuration
    86  func (c *Container) Config() configs.Config {
    87  	return *c.config
    88  }
    89  
    90  // Status returns the current status of the container.
    91  func (c *Container) Status() (Status, error) {
    92  	c.m.Lock()
    93  	defer c.m.Unlock()
    94  	return c.currentStatus()
    95  }
    96  
    97  // State returns the current container's state information.
    98  func (c *Container) State() (*State, error) {
    99  	c.m.Lock()
   100  	defer c.m.Unlock()
   101  	return c.currentState()
   102  }
   103  
   104  // OCIState returns the current container's state information.
   105  func (c *Container) OCIState() (*specs.State, error) {
   106  	c.m.Lock()
   107  	defer c.m.Unlock()
   108  	return c.currentOCIState()
   109  }
   110  
   111  // ignoreCgroupError filters out cgroup-related errors that can be ignored,
   112  // because the container is stopped and its cgroup is gone.
   113  func (c *Container) ignoreCgroupError(err error) error {
   114  	if err == nil {
   115  		return nil
   116  	}
   117  	if errors.Is(err, os.ErrNotExist) && !c.hasInit() && !c.cgroupManager.Exists() {
   118  		return nil
   119  	}
   120  	return err
   121  }
   122  
   123  // Processes returns the PIDs inside this container. The PIDs are in the
   124  // namespace of the calling process.
   125  //
   126  // Some of the returned PIDs may no longer refer to processes in the container,
   127  // unless the container state is PAUSED in which case every PID in the slice is
   128  // valid.
   129  func (c *Container) Processes() ([]int, error) {
   130  	pids, err := c.cgroupManager.GetAllPids()
   131  	if err = c.ignoreCgroupError(err); err != nil {
   132  		return nil, fmt.Errorf("unable to get all container pids: %w", err)
   133  	}
   134  	return pids, nil
   135  }
   136  
   137  // Stats returns statistics for the container.
   138  func (c *Container) Stats() (*Stats, error) {
   139  	var (
   140  		err   error
   141  		stats = &Stats{}
   142  	)
   143  	if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
   144  		return stats, fmt.Errorf("unable to get container cgroup stats: %w", err)
   145  	}
   146  	if c.intelRdtManager != nil {
   147  		if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil {
   148  			return stats, fmt.Errorf("unable to get container Intel RDT stats: %w", err)
   149  		}
   150  	}
   151  	for _, iface := range c.config.Networks {
   152  		switch iface.Type {
   153  		case "veth":
   154  			istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
   155  			if err != nil {
   156  				return stats, fmt.Errorf("unable to get network stats for interface %q: %w", iface.HostInterfaceName, err)
   157  			}
   158  			stats.Interfaces = append(stats.Interfaces, istats)
   159  		}
   160  	}
   161  	return stats, nil
   162  }
   163  
   164  // Set resources of container as configured. Can be used to change resources
   165  // when the container is running.
   166  func (c *Container) Set(config configs.Config) error {
   167  	c.m.Lock()
   168  	defer c.m.Unlock()
   169  	status, err := c.currentStatus()
   170  	if err != nil {
   171  		return err
   172  	}
   173  	if status == Stopped {
   174  		return ErrNotRunning
   175  	}
   176  	if err := c.cgroupManager.Set(config.Cgroups.Resources); err != nil {
   177  		// Set configs back
   178  		if err2 := c.cgroupManager.Set(c.config.Cgroups.Resources); err2 != nil {
   179  			logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
   180  		}
   181  		return err
   182  	}
   183  	if c.intelRdtManager != nil {
   184  		if err := c.intelRdtManager.Set(&config); err != nil {
   185  			// Set configs back
   186  			if err2 := c.cgroupManager.Set(c.config.Cgroups.Resources); err2 != nil {
   187  				logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
   188  			}
   189  			if err2 := c.intelRdtManager.Set(c.config); err2 != nil {
   190  				logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
   191  			}
   192  			return err
   193  		}
   194  	}
   195  	// After config setting succeed, update config and states
   196  	c.config = &config
   197  	_, err = c.updateState(nil)
   198  	return err
   199  }
   200  
   201  // Start starts a process inside the container. Returns error if process fails
   202  // to start. You can track process lifecycle with passed Process structure.
   203  func (c *Container) Start(process *Process) error {
   204  	c.m.Lock()
   205  	defer c.m.Unlock()
   206  	if c.config.Cgroups.Resources.SkipDevices {
   207  		return errors.New("can't start container with SkipDevices set")
   208  	}
   209  	if process.Init {
   210  		if err := c.createExecFifo(); err != nil {
   211  			return err
   212  		}
   213  	}
   214  	if err := c.start(process); err != nil {
   215  		if process.Init {
   216  			c.deleteExecFifo()
   217  		}
   218  		return err
   219  	}
   220  	return nil
   221  }
   222  
   223  // Run immediately starts the process inside the container. Returns an error if
   224  // the process fails to start. It does not block waiting for the exec fifo
   225  // after start returns but opens the fifo after start returns.
   226  func (c *Container) Run(process *Process) error {
   227  	if err := c.Start(process); err != nil {
   228  		return err
   229  	}
   230  	if process.Init {
   231  		return c.exec()
   232  	}
   233  	return nil
   234  }
   235  
   236  // Exec signals the container to exec the users process at the end of the init.
   237  func (c *Container) Exec() error {
   238  	c.m.Lock()
   239  	defer c.m.Unlock()
   240  	return c.exec()
   241  }
   242  
   243  func (c *Container) exec() error {
   244  	path := filepath.Join(c.stateDir, execFifoFilename)
   245  	pid := c.initProcess.pid()
   246  	blockingFifoOpenCh := awaitFifoOpen(path)
   247  	for {
   248  		select {
   249  		case result := <-blockingFifoOpenCh:
   250  			return handleFifoResult(result)
   251  
   252  		case <-time.After(time.Millisecond * 100):
   253  			stat, err := system.Stat(pid)
   254  			if err != nil || stat.State == system.Zombie {
   255  				// could be because process started, ran, and completed between our 100ms timeout and our system.Stat() check.
   256  				// see if the fifo exists and has data (with a non-blocking open, which will succeed if the writing process is complete).
   257  				if err := handleFifoResult(fifoOpen(path, false)); err != nil {
   258  					return errors.New("container process is already dead")
   259  				}
   260  				return nil
   261  			}
   262  		}
   263  	}
   264  }
   265  
   266  func readFromExecFifo(execFifo io.Reader) error {
   267  	data, err := io.ReadAll(execFifo)
   268  	if err != nil {
   269  		return err
   270  	}
   271  	if len(data) <= 0 {
   272  		return errors.New("cannot start an already running container")
   273  	}
   274  	return nil
   275  }
   276  
   277  func awaitFifoOpen(path string) <-chan openResult {
   278  	fifoOpened := make(chan openResult)
   279  	go func() {
   280  		result := fifoOpen(path, true)
   281  		fifoOpened <- result
   282  	}()
   283  	return fifoOpened
   284  }
   285  
   286  func fifoOpen(path string, block bool) openResult {
   287  	flags := os.O_RDONLY
   288  	if !block {
   289  		flags |= unix.O_NONBLOCK
   290  	}
   291  	f, err := os.OpenFile(path, flags, 0)
   292  	if err != nil {
   293  		return openResult{err: fmt.Errorf("exec fifo: %w", err)}
   294  	}
   295  	return openResult{file: f}
   296  }
   297  
   298  func handleFifoResult(result openResult) error {
   299  	if result.err != nil {
   300  		return result.err
   301  	}
   302  	f := result.file
   303  	defer f.Close()
   304  	if err := readFromExecFifo(f); err != nil {
   305  		return err
   306  	}
   307  	return os.Remove(f.Name())
   308  }
   309  
   310  type openResult struct {
   311  	file *os.File
   312  	err  error
   313  }
   314  
   315  func (c *Container) start(process *Process) (retErr error) {
   316  	parent, err := c.newParentProcess(process)
   317  	if err != nil {
   318  		return fmt.Errorf("unable to create new parent process: %w", err)
   319  	}
   320  	// We do not need the cloned binaries once the process is spawned.
   321  	defer process.closeClonedExes()
   322  
   323  	logsDone := parent.forwardChildLogs()
   324  	if logsDone != nil {
   325  		defer func() {
   326  			// Wait for log forwarder to finish. This depends on
   327  			// runc init closing the _LIBCONTAINER_LOGPIPE log fd.
   328  			err := <-logsDone
   329  			if err != nil && retErr == nil {
   330  				retErr = fmt.Errorf("unable to forward init logs: %w", err)
   331  			}
   332  		}()
   333  	}
   334  
   335  	// Before starting "runc init", mark all non-stdio open files as O_CLOEXEC
   336  	// to make sure we don't leak any files into "runc init". Any files to be
   337  	// passed to "runc init" through ExtraFiles will get dup2'd by the Go
   338  	// runtime and thus their O_CLOEXEC flag will be cleared. This is some
   339  	// additional protection against attacks like CVE-2024-21626, by making
   340  	// sure we never leak files to "runc init" we didn't intend to.
   341  	if err := utils.CloseExecFrom(3); err != nil {
   342  		return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err)
   343  	}
   344  	if err := parent.start(); err != nil {
   345  		return fmt.Errorf("unable to start container process: %w", err)
   346  	}
   347  
   348  	if process.Init {
   349  		c.fifo.Close()
   350  		if c.config.Hooks != nil {
   351  			s, err := c.currentOCIState()
   352  			if err != nil {
   353  				return err
   354  			}
   355  
   356  			if err := c.config.Hooks.Run(configs.Poststart, s); err != nil {
   357  				if err := ignoreTerminateErrors(parent.terminate()); err != nil {
   358  					logrus.Warn(fmt.Errorf("error running poststart hook: %w", err))
   359  				}
   360  				return err
   361  			}
   362  		}
   363  	}
   364  	return nil
   365  }
   366  
   367  // Signal sends a specified signal to container's init.
   368  //
   369  // When s is SIGKILL and the container does not have its own PID namespace, all
   370  // the container's processes are killed. In this scenario, the libcontainer
   371  // user may be required to implement a proper child reaper.
   372  func (c *Container) Signal(s os.Signal) error {
   373  	c.m.Lock()
   374  	defer c.m.Unlock()
   375  
   376  	// When a container has its own PID namespace, inside it the init PID
   377  	// is 1, and thus it is handled specially by the kernel. In particular,
   378  	// killing init with SIGKILL from an ancestor namespace will also kill
   379  	// all other processes in that PID namespace (see pid_namespaces(7)).
   380  	//
   381  	// OTOH, if PID namespace is shared, we should kill all pids to avoid
   382  	// leftover processes. Handle this special case here.
   383  	if s == unix.SIGKILL && !c.config.Namespaces.IsPrivate(configs.NEWPID) {
   384  		if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil {
   385  			return fmt.Errorf("unable to kill all processes: %w", err)
   386  		}
   387  		return nil
   388  	}
   389  
   390  	// To avoid a PID reuse attack, don't kill non-running container.
   391  	if !c.hasInit() {
   392  		return ErrNotRunning
   393  	}
   394  	if err := c.initProcess.signal(s); err != nil {
   395  		return fmt.Errorf("unable to signal init: %w", err)
   396  	}
   397  	if s == unix.SIGKILL {
   398  		// For cgroup v1, killing a process in a frozen cgroup
   399  		// does nothing until it's thawed. Only thaw the cgroup
   400  		// for SIGKILL.
   401  		if paused, _ := c.isPaused(); paused {
   402  			_ = c.cgroupManager.Freeze(configs.Thawed)
   403  		}
   404  	}
   405  	return nil
   406  }
   407  
   408  func (c *Container) createExecFifo() error {
   409  	rootuid, err := c.Config().HostRootUID()
   410  	if err != nil {
   411  		return err
   412  	}
   413  	rootgid, err := c.Config().HostRootGID()
   414  	if err != nil {
   415  		return err
   416  	}
   417  
   418  	fifoName := filepath.Join(c.stateDir, execFifoFilename)
   419  	if _, err := os.Stat(fifoName); err == nil {
   420  		return fmt.Errorf("exec fifo %s already exists", fifoName)
   421  	}
   422  	if err := unix.Mkfifo(fifoName, 0o622); err != nil {
   423  		return &os.PathError{Op: "mkfifo", Path: fifoName, Err: err}
   424  	}
   425  	// Ensure permission bits (can be different because of umask).
   426  	if err := os.Chmod(fifoName, 0o622); err != nil {
   427  		return err
   428  	}
   429  	return os.Chown(fifoName, rootuid, rootgid)
   430  }
   431  
   432  func (c *Container) deleteExecFifo() {
   433  	fifoName := filepath.Join(c.stateDir, execFifoFilename)
   434  	os.Remove(fifoName)
   435  }
   436  
   437  // includeExecFifo opens the container's execfifo as a pathfd, so that the
   438  // container cannot access the statedir (and the FIFO itself remains
   439  // un-opened). It then adds the FifoFd to the given exec.Cmd as an inherited
   440  // fd, with _LIBCONTAINER_FIFOFD set to its fd number.
   441  func (c *Container) includeExecFifo(cmd *exec.Cmd) error {
   442  	fifoName := filepath.Join(c.stateDir, execFifoFilename)
   443  	fifo, err := os.OpenFile(fifoName, unix.O_PATH|unix.O_CLOEXEC, 0)
   444  	if err != nil {
   445  		return err
   446  	}
   447  	c.fifo = fifo
   448  
   449  	cmd.ExtraFiles = append(cmd.ExtraFiles, fifo)
   450  	cmd.Env = append(cmd.Env,
   451  		"_LIBCONTAINER_FIFOFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
   452  	return nil
   453  }
   454  
   455  // No longer needed in Go 1.21.
   456  func slicesContains[S ~[]E, E comparable](slice S, needle E) bool {
   457  	for _, val := range slice {
   458  		if val == needle {
   459  			return true
   460  		}
   461  	}
   462  	return false
   463  }
   464  
   465  func isDmzBinarySafe(c *configs.Config) bool {
   466  	// Because we set the dumpable flag in nsexec, the only time when it is
   467  	// unsafe to use runc-dmz is when the container process would be able to
   468  	// race against "runc init" and bypass the ptrace_may_access() checks.
   469  	//
   470  	// This is only the case if the container processes could have
   471  	// CAP_SYS_PTRACE somehow (i.e. the capability is present in the bounding,
   472  	// inheritable, or ambient sets). Luckily, most containers do not have this
   473  	// capability.
   474  	if c.Capabilities == nil ||
   475  		(!slicesContains(c.Capabilities.Bounding, "CAP_SYS_PTRACE") &&
   476  			!slicesContains(c.Capabilities.Inheritable, "CAP_SYS_PTRACE") &&
   477  			!slicesContains(c.Capabilities.Ambient, "CAP_SYS_PTRACE")) {
   478  		return true
   479  	}
   480  
   481  	// Since Linux 4.10 (see bfedb589252c0) user namespaced containers cannot
   482  	// access /proc/$pid/exe of runc after it joins the namespace (until it
   483  	// does an exec), regardless of the capability set. This has been
   484  	// backported to other distribution kernels, but there's no way of checking
   485  	// this cheaply -- better to be safe than sorry here.
   486  	linux410 := kernelversion.KernelVersion{Kernel: 4, Major: 10}
   487  	if ok, err := kernelversion.GreaterEqualThan(linux410); ok && err == nil {
   488  		if c.Namespaces.Contains(configs.NEWUSER) {
   489  			return true
   490  		}
   491  	}
   492  
   493  	// Assume it's unsafe otherwise.
   494  	return false
   495  }
   496  
   497  func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
   498  	comm, err := newProcessComm()
   499  	if err != nil {
   500  		return nil, err
   501  	}
   502  
   503  	// Make sure we use a new safe copy of /proc/self/exe or the runc-dmz
   504  	// binary each time this is called, to make sure that if a container
   505  	// manages to overwrite the file it cannot affect other containers on the
   506  	// system. For runc, this code will only ever be called once, but
   507  	// libcontainer users might call this more than once.
   508  	p.closeClonedExes()
   509  	var (
   510  		exePath string
   511  		// only one of dmzExe or safeExe are used at a time
   512  		dmzExe, safeExe *os.File
   513  	)
   514  	if dmz.IsSelfExeCloned() {
   515  		// /proc/self/exe is already a cloned binary -- no need to do anything
   516  		logrus.Debug("skipping binary cloning -- /proc/self/exe is already cloned!")
   517  		// We don't need to use /proc/thread-self here because the exe mm of a
   518  		// thread-group is guaranteed to be the same for all threads by
   519  		// definition. This lets us avoid having to do runtime.LockOSThread.
   520  		exePath = "/proc/self/exe"
   521  	} else {
   522  		var err error
   523  		if isDmzBinarySafe(c.config) {
   524  			dmzExe, err = dmz.Binary(c.stateDir)
   525  			if err == nil {
   526  				// We can use our own executable without cloning if we are
   527  				// using runc-dmz. We don't need to use /proc/thread-self here
   528  				// because the exe mm of a thread-group is guaranteed to be the
   529  				// same for all threads by definition. This lets us avoid
   530  				// having to do runtime.LockOSThread.
   531  				exePath = "/proc/self/exe"
   532  				p.clonedExes = append(p.clonedExes, dmzExe)
   533  				logrus.Debug("runc-dmz: using runc-dmz") // used for tests
   534  			} else if errors.Is(err, dmz.ErrNoDmzBinary) {
   535  				logrus.Debug("runc-dmz binary not embedded in runc binary, falling back to /proc/self/exe clone")
   536  			} else if err != nil {
   537  				return nil, fmt.Errorf("failed to create runc-dmz binary clone: %w", err)
   538  			}
   539  		} else {
   540  			// If the configuration makes it unsafe to use runc-dmz, pretend we
   541  			// don't have it embedded so we do /proc/self/exe cloning.
   542  			logrus.Debug("container configuration unsafe for runc-dmz, falling back to /proc/self/exe clone")
   543  			err = dmz.ErrNoDmzBinary
   544  		}
   545  		if errors.Is(err, dmz.ErrNoDmzBinary) {
   546  			safeExe, err = dmz.CloneSelfExe(c.stateDir)
   547  			if err != nil {
   548  				return nil, fmt.Errorf("unable to create safe /proc/self/exe clone for runc init: %w", err)
   549  			}
   550  			exePath = "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd()))
   551  			p.clonedExes = append(p.clonedExes, safeExe)
   552  			logrus.Debug("runc-dmz: using /proc/self/exe clone") // used for tests
   553  		}
   554  		// Just to make sure we don't run without protection.
   555  		if dmzExe == nil && safeExe == nil {
   556  			// This should never happen.
   557  			return nil, fmt.Errorf("[internal error] attempted to spawn a container with no /proc/self/exe protection")
   558  		}
   559  	}
   560  
   561  	cmd := exec.Command(exePath, "init")
   562  	cmd.Args[0] = os.Args[0]
   563  	cmd.Stdin = p.Stdin
   564  	cmd.Stdout = p.Stdout
   565  	cmd.Stderr = p.Stderr
   566  	cmd.Dir = c.config.Rootfs
   567  	if cmd.SysProcAttr == nil {
   568  		cmd.SysProcAttr = &unix.SysProcAttr{}
   569  	}
   570  	cmd.Env = append(cmd.Env, "GOMAXPROCS="+os.Getenv("GOMAXPROCS"))
   571  	cmd.ExtraFiles = append(cmd.ExtraFiles, p.ExtraFiles...)
   572  	if p.ConsoleSocket != nil {
   573  		cmd.ExtraFiles = append(cmd.ExtraFiles, p.ConsoleSocket)
   574  		cmd.Env = append(cmd.Env,
   575  			"_LIBCONTAINER_CONSOLE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
   576  		)
   577  	}
   578  
   579  	cmd.ExtraFiles = append(cmd.ExtraFiles, comm.initSockChild)
   580  	cmd.Env = append(cmd.Env,
   581  		"_LIBCONTAINER_INITPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
   582  	)
   583  	cmd.ExtraFiles = append(cmd.ExtraFiles, comm.syncSockChild.File())
   584  	cmd.Env = append(cmd.Env,
   585  		"_LIBCONTAINER_SYNCPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
   586  	)
   587  
   588  	if dmzExe != nil {
   589  		cmd.ExtraFiles = append(cmd.ExtraFiles, dmzExe)
   590  		cmd.Env = append(cmd.Env,
   591  			"_LIBCONTAINER_DMZEXEFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
   592  	}
   593  
   594  	cmd.ExtraFiles = append(cmd.ExtraFiles, comm.logPipeChild)
   595  	cmd.Env = append(cmd.Env,
   596  		"_LIBCONTAINER_LOGPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
   597  	if p.LogLevel != "" {
   598  		cmd.Env = append(cmd.Env, "_LIBCONTAINER_LOGLEVEL="+p.LogLevel)
   599  	}
   600  
   601  	if p.PidfdSocket != nil {
   602  		cmd.ExtraFiles = append(cmd.ExtraFiles, p.PidfdSocket)
   603  		cmd.Env = append(cmd.Env,
   604  			"_LIBCONTAINER_PIDFD_SOCK="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
   605  		)
   606  	}
   607  
   608  	if safeExe != nil {
   609  		// Due to a Go stdlib bug, we need to add safeExe to the set of
   610  		// ExtraFiles otherwise it is possible for the stdlib to clobber the fd
   611  		// during forkAndExecInChild1 and replace it with some other file that
   612  		// might be malicious. This is less than ideal (because the descriptor
   613  		// will be non-O_CLOEXEC) however we have protections in "runc init" to
   614  		// stop us from leaking extra file descriptors.
   615  		//
   616  		// See <https://github.com/golang/go/issues/61751>.
   617  		cmd.ExtraFiles = append(cmd.ExtraFiles, safeExe)
   618  	}
   619  
   620  	// NOTE: when running a container with no PID namespace and the parent
   621  	//       process spawning the container is PID1 the pdeathsig is being
   622  	//       delivered to the container's init process by the kernel for some
   623  	//       reason even with the parent still running.
   624  	if c.config.ParentDeathSignal > 0 {
   625  		cmd.SysProcAttr.Pdeathsig = unix.Signal(c.config.ParentDeathSignal)
   626  	}
   627  
   628  	if p.Init {
   629  		// We only set up fifoFd if we're not doing a `runc exec`. The historic
   630  		// reason for this is that previously we would pass a dirfd that allowed
   631  		// for container rootfs escape (and not doing it in `runc exec` avoided
   632  		// that problem), but we no longer do that. However, there's no need to do
   633  		// this for `runc exec` so we just keep it this way to be safe.
   634  		if err := c.includeExecFifo(cmd); err != nil {
   635  			return nil, fmt.Errorf("unable to setup exec fifo: %w", err)
   636  		}
   637  		return c.newInitProcess(p, cmd, comm)
   638  	}
   639  	return c.newSetnsProcess(p, cmd, comm)
   640  }
   641  
   642  func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm) (*initProcess, error) {
   643  	cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
   644  	nsMaps := make(map[configs.NamespaceType]string)
   645  	for _, ns := range c.config.Namespaces {
   646  		if ns.Path != "" {
   647  			nsMaps[ns.Type] = ns.Path
   648  		}
   649  	}
   650  	data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps)
   651  	if err != nil {
   652  		return nil, err
   653  	}
   654  
   655  	init := &initProcess{
   656  		cmd:             cmd,
   657  		comm:            comm,
   658  		manager:         c.cgroupManager,
   659  		intelRdtManager: c.intelRdtManager,
   660  		config:          c.newInitConfig(p),
   661  		container:       c,
   662  		process:         p,
   663  		bootstrapData:   data,
   664  	}
   665  	c.initProcess = init
   666  	return init, nil
   667  }
   668  
   669  func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm) (*setnsProcess, error) {
   670  	cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
   671  	state, err := c.currentState()
   672  	if err != nil {
   673  		return nil, fmt.Errorf("unable to get container state: %w", err)
   674  	}
   675  	// for setns process, we don't have to set cloneflags as the process namespaces
   676  	// will only be set via setns syscall
   677  	data, err := c.bootstrapData(0, state.NamespacePaths)
   678  	if err != nil {
   679  		return nil, err
   680  	}
   681  	proc := &setnsProcess{
   682  		cmd:             cmd,
   683  		cgroupPaths:     state.CgroupPaths,
   684  		rootlessCgroups: c.config.RootlessCgroups,
   685  		intelRdtPath:    state.IntelRdtPath,
   686  		comm:            comm,
   687  		manager:         c.cgroupManager,
   688  		config:          c.newInitConfig(p),
   689  		process:         p,
   690  		bootstrapData:   data,
   691  		initProcessPid:  state.InitProcessPid,
   692  	}
   693  	if len(p.SubCgroupPaths) > 0 {
   694  		if add, ok := p.SubCgroupPaths[""]; ok {
   695  			// cgroup v1: using the same path for all controllers.
   696  			// cgroup v2: the only possible way.
   697  			for k := range proc.cgroupPaths {
   698  				subPath := path.Join(proc.cgroupPaths[k], add)
   699  				if !strings.HasPrefix(subPath, proc.cgroupPaths[k]) {
   700  					return nil, fmt.Errorf("%s is not a sub cgroup path", add)
   701  				}
   702  				proc.cgroupPaths[k] = subPath
   703  			}
   704  			// cgroup v2: do not try to join init process's cgroup
   705  			// as a fallback (see (*setnsProcess).start).
   706  			proc.initProcessPid = 0
   707  		} else {
   708  			// Per-controller paths.
   709  			for ctrl, add := range p.SubCgroupPaths {
   710  				if val, ok := proc.cgroupPaths[ctrl]; ok {
   711  					subPath := path.Join(val, add)
   712  					if !strings.HasPrefix(subPath, val) {
   713  						return nil, fmt.Errorf("%s is not a sub cgroup path", add)
   714  					}
   715  					proc.cgroupPaths[ctrl] = subPath
   716  				} else {
   717  					return nil, fmt.Errorf("unknown controller %s in SubCgroupPaths", ctrl)
   718  				}
   719  			}
   720  		}
   721  	}
   722  	return proc, nil
   723  }
   724  
   725  func (c *Container) newInitConfig(process *Process) *initConfig {
   726  	cfg := &initConfig{
   727  		Config:           c.config,
   728  		Args:             process.Args,
   729  		Env:              process.Env,
   730  		User:             process.User,
   731  		AdditionalGroups: process.AdditionalGroups,
   732  		Cwd:              process.Cwd,
   733  		Capabilities:     process.Capabilities,
   734  		PassedFilesCount: len(process.ExtraFiles),
   735  		ContainerID:      c.ID(),
   736  		NoNewPrivileges:  c.config.NoNewPrivileges,
   737  		RootlessEUID:     c.config.RootlessEUID,
   738  		RootlessCgroups:  c.config.RootlessCgroups,
   739  		AppArmorProfile:  c.config.AppArmorProfile,
   740  		ProcessLabel:     c.config.ProcessLabel,
   741  		Rlimits:          c.config.Rlimits,
   742  		CreateConsole:    process.ConsoleSocket != nil,
   743  		ConsoleWidth:     process.ConsoleWidth,
   744  		ConsoleHeight:    process.ConsoleHeight,
   745  	}
   746  	if process.NoNewPrivileges != nil {
   747  		cfg.NoNewPrivileges = *process.NoNewPrivileges
   748  	}
   749  	if process.AppArmorProfile != "" {
   750  		cfg.AppArmorProfile = process.AppArmorProfile
   751  	}
   752  	if process.Label != "" {
   753  		cfg.ProcessLabel = process.Label
   754  	}
   755  	if len(process.Rlimits) > 0 {
   756  		cfg.Rlimits = process.Rlimits
   757  	}
   758  	if cgroups.IsCgroup2UnifiedMode() {
   759  		cfg.Cgroup2Path = c.cgroupManager.Path("")
   760  	}
   761  
   762  	return cfg
   763  }
   764  
   765  // Destroy destroys the container, if its in a valid state.
   766  //
   767  // Any event registrations are removed before the container is destroyed.
   768  // No error is returned if the container is already destroyed.
   769  //
   770  // Running containers must first be stopped using Signal.
   771  // Paused containers must first be resumed using Resume.
   772  func (c *Container) Destroy() error {
   773  	c.m.Lock()
   774  	defer c.m.Unlock()
   775  	if err := c.state.destroy(); err != nil {
   776  		return fmt.Errorf("unable to destroy container: %w", err)
   777  	}
   778  	return nil
   779  }
   780  
   781  // Pause pauses the container, if its state is RUNNING or CREATED, changing
   782  // its state to PAUSED. If the state is already PAUSED, does nothing.
   783  func (c *Container) Pause() error {
   784  	c.m.Lock()
   785  	defer c.m.Unlock()
   786  	status, err := c.currentStatus()
   787  	if err != nil {
   788  		return err
   789  	}
   790  	switch status {
   791  	case Running, Created:
   792  		if err := c.cgroupManager.Freeze(configs.Frozen); err != nil {
   793  			return err
   794  		}
   795  		return c.state.transition(&pausedState{
   796  			c: c,
   797  		})
   798  	}
   799  	return ErrNotRunning
   800  }
   801  
   802  // Resume resumes the execution of any user processes in the
   803  // container before setting the container state to RUNNING.
   804  // This is only performed if the current state is PAUSED.
   805  // If the Container state is RUNNING, does nothing.
   806  func (c *Container) Resume() error {
   807  	c.m.Lock()
   808  	defer c.m.Unlock()
   809  	status, err := c.currentStatus()
   810  	if err != nil {
   811  		return err
   812  	}
   813  	if status != Paused {
   814  		return ErrNotPaused
   815  	}
   816  	if err := c.cgroupManager.Freeze(configs.Thawed); err != nil {
   817  		return err
   818  	}
   819  	return c.state.transition(&runningState{
   820  		c: c,
   821  	})
   822  }
   823  
   824  // NotifyOOM returns a read-only channel signaling when the container receives
   825  // an OOM notification.
   826  func (c *Container) NotifyOOM() (<-chan struct{}, error) {
   827  	// XXX(cyphar): This requires cgroups.
   828  	if c.config.RootlessCgroups {
   829  		logrus.Warn("getting OOM notifications may fail if you don't have the full access to cgroups")
   830  	}
   831  	path := c.cgroupManager.Path("memory")
   832  	if cgroups.IsCgroup2UnifiedMode() {
   833  		return notifyOnOOMV2(path)
   834  	}
   835  	return notifyOnOOM(path)
   836  }
   837  
   838  // NotifyMemoryPressure returns a read-only channel signaling when the
   839  // container reaches a given pressure level.
   840  func (c *Container) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) {
   841  	// XXX(cyphar): This requires cgroups.
   842  	if c.config.RootlessCgroups {
   843  		logrus.Warn("getting memory pressure notifications may fail if you don't have the full access to cgroups")
   844  	}
   845  	return notifyMemoryPressure(c.cgroupManager.Path("memory"), level)
   846  }
   847  
   848  func (c *Container) updateState(process parentProcess) (*State, error) {
   849  	if process != nil {
   850  		c.initProcess = process
   851  	}
   852  	state, err := c.currentState()
   853  	if err != nil {
   854  		return nil, err
   855  	}
   856  	err = c.saveState(state)
   857  	if err != nil {
   858  		return nil, err
   859  	}
   860  	return state, nil
   861  }
   862  
   863  func (c *Container) saveState(s *State) (retErr error) {
   864  	tmpFile, err := os.CreateTemp(c.stateDir, "state-")
   865  	if err != nil {
   866  		return err
   867  	}
   868  
   869  	defer func() {
   870  		if retErr != nil {
   871  			tmpFile.Close()
   872  			os.Remove(tmpFile.Name())
   873  		}
   874  	}()
   875  
   876  	err = utils.WriteJSON(tmpFile, s)
   877  	if err != nil {
   878  		return err
   879  	}
   880  	err = tmpFile.Close()
   881  	if err != nil {
   882  		return err
   883  	}
   884  
   885  	stateFilePath := filepath.Join(c.stateDir, stateFilename)
   886  	return os.Rename(tmpFile.Name(), stateFilePath)
   887  }
   888  
   889  func (c *Container) currentStatus() (Status, error) {
   890  	if err := c.refreshState(); err != nil {
   891  		return -1, err
   892  	}
   893  	return c.state.status(), nil
   894  }
   895  
   896  // refreshState needs to be called to verify that the current state on the
   897  // container is what is true.  Because consumers of libcontainer can use it
   898  // out of process we need to verify the container's status based on runtime
   899  // information and not rely on our in process info.
   900  func (c *Container) refreshState() error {
   901  	paused, err := c.isPaused()
   902  	if err != nil {
   903  		return err
   904  	}
   905  	if paused {
   906  		return c.state.transition(&pausedState{c: c})
   907  	}
   908  	if !c.hasInit() {
   909  		return c.state.transition(&stoppedState{c: c})
   910  	}
   911  	// The presence of exec fifo helps to distinguish between
   912  	// the created and the running states.
   913  	if _, err := os.Stat(filepath.Join(c.stateDir, execFifoFilename)); err == nil {
   914  		return c.state.transition(&createdState{c: c})
   915  	}
   916  	return c.state.transition(&runningState{c: c})
   917  }
   918  
   919  // hasInit tells whether the container init process exists.
   920  func (c *Container) hasInit() bool {
   921  	if c.initProcess == nil {
   922  		return false
   923  	}
   924  	pid := c.initProcess.pid()
   925  	stat, err := system.Stat(pid)
   926  	if err != nil {
   927  		return false
   928  	}
   929  	if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead {
   930  		return false
   931  	}
   932  	return true
   933  }
   934  
   935  func (c *Container) isPaused() (bool, error) {
   936  	state, err := c.cgroupManager.GetFreezerState()
   937  	if err != nil {
   938  		return false, err
   939  	}
   940  	return state == configs.Frozen, nil
   941  }
   942  
   943  func (c *Container) currentState() (*State, error) {
   944  	var (
   945  		startTime           uint64
   946  		externalDescriptors []string
   947  		pid                 = -1
   948  	)
   949  	if c.initProcess != nil {
   950  		pid = c.initProcess.pid()
   951  		startTime, _ = c.initProcess.startTime()
   952  		externalDescriptors = c.initProcess.externalDescriptors()
   953  	}
   954  
   955  	intelRdtPath := ""
   956  	if c.intelRdtManager != nil {
   957  		intelRdtPath = c.intelRdtManager.GetPath()
   958  	}
   959  	state := &State{
   960  		BaseState: BaseState{
   961  			ID:                   c.ID(),
   962  			Config:               *c.config,
   963  			InitProcessPid:       pid,
   964  			InitProcessStartTime: startTime,
   965  			Created:              c.created,
   966  		},
   967  		Rootless:            c.config.RootlessEUID && c.config.RootlessCgroups,
   968  		CgroupPaths:         c.cgroupManager.GetPaths(),
   969  		IntelRdtPath:        intelRdtPath,
   970  		NamespacePaths:      make(map[configs.NamespaceType]string),
   971  		ExternalDescriptors: externalDescriptors,
   972  	}
   973  	if pid > 0 {
   974  		for _, ns := range c.config.Namespaces {
   975  			state.NamespacePaths[ns.Type] = ns.GetPath(pid)
   976  		}
   977  		for _, nsType := range configs.NamespaceTypes() {
   978  			if !configs.IsNamespaceSupported(nsType) {
   979  				continue
   980  			}
   981  			if _, ok := state.NamespacePaths[nsType]; !ok {
   982  				ns := configs.Namespace{Type: nsType}
   983  				state.NamespacePaths[ns.Type] = ns.GetPath(pid)
   984  			}
   985  		}
   986  	}
   987  	return state, nil
   988  }
   989  
   990  func (c *Container) currentOCIState() (*specs.State, error) {
   991  	bundle, annotations := utils.Annotations(c.config.Labels)
   992  	state := &specs.State{
   993  		Version:     specs.Version,
   994  		ID:          c.ID(),
   995  		Bundle:      bundle,
   996  		Annotations: annotations,
   997  	}
   998  	status, err := c.currentStatus()
   999  	if err != nil {
  1000  		return nil, err
  1001  	}
  1002  	state.Status = specs.ContainerState(status.String())
  1003  	if status != Stopped {
  1004  		if c.initProcess != nil {
  1005  			state.Pid = c.initProcess.pid()
  1006  		}
  1007  	}
  1008  	return state, nil
  1009  }
  1010  
  1011  // orderNamespacePaths sorts namespace paths into a list of paths that we
  1012  // can setns in order.
  1013  func (c *Container) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) {
  1014  	paths := []string{}
  1015  	for _, ns := range configs.NamespaceTypes() {
  1016  
  1017  		// Remove namespaces that we don't need to join.
  1018  		if !c.config.Namespaces.Contains(ns) {
  1019  			continue
  1020  		}
  1021  
  1022  		if p, ok := namespaces[ns]; ok && p != "" {
  1023  			// check if the requested namespace is supported
  1024  			if !configs.IsNamespaceSupported(ns) {
  1025  				return nil, fmt.Errorf("namespace %s is not supported", ns)
  1026  			}
  1027  			// only set to join this namespace if it exists
  1028  			if _, err := os.Lstat(p); err != nil {
  1029  				return nil, fmt.Errorf("namespace path: %w", err)
  1030  			}
  1031  			// do not allow namespace path with comma as we use it to separate
  1032  			// the namespace paths
  1033  			if strings.ContainsRune(p, ',') {
  1034  				return nil, fmt.Errorf("invalid namespace path %s", p)
  1035  			}
  1036  			paths = append(paths, fmt.Sprintf("%s:%s", configs.NsName(ns), p))
  1037  		}
  1038  
  1039  	}
  1040  
  1041  	return paths, nil
  1042  }
  1043  
  1044  func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
  1045  	data := bytes.NewBuffer(nil)
  1046  	for _, im := range idMap {
  1047  		line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size)
  1048  		if _, err := data.WriteString(line); err != nil {
  1049  			return nil, err
  1050  		}
  1051  	}
  1052  	return data.Bytes(), nil
  1053  }
  1054  
  1055  // netlinkError is an error wrapper type for use by custom netlink message
  1056  // types. Panics with errors are wrapped in netlinkError so that the recover
  1057  // in bootstrapData can distinguish intentional panics.
  1058  type netlinkError struct{ error }
  1059  
  1060  // bootstrapData encodes the necessary data in netlink binary format
  1061  // as a io.Reader.
  1062  // Consumer can write the data to a bootstrap program
  1063  // such as one that uses nsenter package to bootstrap the container's
  1064  // init process correctly, i.e. with correct namespaces, uid/gid
  1065  // mapping etc.
  1066  func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (_ io.Reader, Err error) {
  1067  	// create the netlink message
  1068  	r := nl.NewNetlinkRequest(int(InitMsg), 0)
  1069  
  1070  	// Our custom messages cannot bubble up an error using returns, instead
  1071  	// they will panic with the specific error type, netlinkError. In that
  1072  	// case, recover from the panic and return that as an error.
  1073  	defer func() {
  1074  		if r := recover(); r != nil {
  1075  			if e, ok := r.(netlinkError); ok {
  1076  				Err = e.error
  1077  			} else {
  1078  				panic(r)
  1079  			}
  1080  		}
  1081  	}()
  1082  
  1083  	// write cloneFlags
  1084  	r.AddData(&Int32msg{
  1085  		Type:  CloneFlagsAttr,
  1086  		Value: uint32(cloneFlags),
  1087  	})
  1088  
  1089  	// write custom namespace paths
  1090  	if len(nsMaps) > 0 {
  1091  		nsPaths, err := c.orderNamespacePaths(nsMaps)
  1092  		if err != nil {
  1093  			return nil, err
  1094  		}
  1095  		r.AddData(&Bytemsg{
  1096  			Type:  NsPathsAttr,
  1097  			Value: []byte(strings.Join(nsPaths, ",")),
  1098  		})
  1099  	}
  1100  
  1101  	// write namespace paths only when we are not joining an existing user ns
  1102  	_, joinExistingUser := nsMaps[configs.NEWUSER]
  1103  	if !joinExistingUser {
  1104  		// write uid mappings
  1105  		if len(c.config.UIDMappings) > 0 {
  1106  			if c.config.RootlessEUID {
  1107  				// We resolve the paths for new{u,g}idmap from
  1108  				// the context of runc to avoid doing a path
  1109  				// lookup in the nsexec context.
  1110  				if path, err := execabs.LookPath("newuidmap"); err == nil {
  1111  					r.AddData(&Bytemsg{
  1112  						Type:  UidmapPathAttr,
  1113  						Value: []byte(path),
  1114  					})
  1115  				}
  1116  			}
  1117  			b, err := encodeIDMapping(c.config.UIDMappings)
  1118  			if err != nil {
  1119  				return nil, err
  1120  			}
  1121  			r.AddData(&Bytemsg{
  1122  				Type:  UidmapAttr,
  1123  				Value: b,
  1124  			})
  1125  		}
  1126  
  1127  		// write gid mappings
  1128  		if len(c.config.GIDMappings) > 0 {
  1129  			b, err := encodeIDMapping(c.config.GIDMappings)
  1130  			if err != nil {
  1131  				return nil, err
  1132  			}
  1133  			r.AddData(&Bytemsg{
  1134  				Type:  GidmapAttr,
  1135  				Value: b,
  1136  			})
  1137  			if c.config.RootlessEUID {
  1138  				if path, err := execabs.LookPath("newgidmap"); err == nil {
  1139  					r.AddData(&Bytemsg{
  1140  						Type:  GidmapPathAttr,
  1141  						Value: []byte(path),
  1142  					})
  1143  				}
  1144  			}
  1145  			if requiresRootOrMappingTool(c.config) {
  1146  				r.AddData(&Boolmsg{
  1147  					Type:  SetgroupAttr,
  1148  					Value: true,
  1149  				})
  1150  			}
  1151  		}
  1152  	}
  1153  
  1154  	if c.config.OomScoreAdj != nil {
  1155  		// write oom_score_adj
  1156  		r.AddData(&Bytemsg{
  1157  			Type:  OomScoreAdjAttr,
  1158  			Value: []byte(strconv.Itoa(*c.config.OomScoreAdj)),
  1159  		})
  1160  	}
  1161  
  1162  	// write rootless
  1163  	r.AddData(&Boolmsg{
  1164  		Type:  RootlessEUIDAttr,
  1165  		Value: c.config.RootlessEUID,
  1166  	})
  1167  
  1168  	// write boottime and monotonic time ns offsets.
  1169  	if c.config.TimeOffsets != nil {
  1170  		var offsetSpec bytes.Buffer
  1171  		for clock, offset := range c.config.TimeOffsets {
  1172  			fmt.Fprintf(&offsetSpec, "%s %d %d\n", clock, offset.Secs, offset.Nanosecs)
  1173  		}
  1174  		r.AddData(&Bytemsg{
  1175  			Type:  TimeOffsetsAttr,
  1176  			Value: offsetSpec.Bytes(),
  1177  		})
  1178  	}
  1179  
  1180  	return bytes.NewReader(r.Serialize()), nil
  1181  }
  1182  
  1183  // ignoreTerminateErrors returns nil if the given err matches an error known
  1184  // to indicate that the terminate occurred successfully or err was nil, otherwise
  1185  // err is returned unaltered.
  1186  func ignoreTerminateErrors(err error) error {
  1187  	if err == nil {
  1188  		return nil
  1189  	}
  1190  	// terminate() might return an error from either Kill or Wait.
  1191  	// The (*Cmd).Wait documentation says: "If the command fails to run
  1192  	// or doesn't complete successfully, the error is of type *ExitError".
  1193  	// Filter out such errors (like "exit status 1" or "signal: killed").
  1194  	var exitErr *exec.ExitError
  1195  	if errors.As(err, &exitErr) {
  1196  		return nil
  1197  	}
  1198  	if errors.Is(err, os.ErrProcessDone) {
  1199  		return nil
  1200  	}
  1201  	s := err.Error()
  1202  	if strings.Contains(s, "Wait was already called") {
  1203  		return nil
  1204  	}
  1205  	return err
  1206  }
  1207  
  1208  func requiresRootOrMappingTool(c *configs.Config) bool {
  1209  	gidMap := []configs.IDMap{
  1210  		{ContainerID: 0, HostID: int64(os.Getegid()), Size: 1},
  1211  	}
  1212  	return !reflect.DeepEqual(c.GIDMappings, gidMap)
  1213  }