github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/runsc/cmd/boot.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cmd
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"io/ioutil"
    21  	"os"
    22  	"os/exec"
    23  	"path/filepath"
    24  	"runtime"
    25  	"runtime/debug"
    26  	"strconv"
    27  	"strings"
    28  
    29  	"github.com/google/subcommands"
    30  	specs "github.com/opencontainers/runtime-spec/specs-go"
    31  	"github.com/ttpreport/gvisor-ligolo/pkg/coretag"
    32  	"github.com/ttpreport/gvisor-ligolo/pkg/cpuid"
    33  	"github.com/ttpreport/gvisor-ligolo/pkg/log"
    34  	"github.com/ttpreport/gvisor-ligolo/pkg/metric"
    35  	"github.com/ttpreport/gvisor-ligolo/pkg/ring0"
    36  	"github.com/ttpreport/gvisor-ligolo/pkg/sentry/platform"
    37  	"github.com/ttpreport/gvisor-ligolo/runsc/boot"
    38  	"github.com/ttpreport/gvisor-ligolo/runsc/cmd/util"
    39  	"github.com/ttpreport/gvisor-ligolo/runsc/config"
    40  	"github.com/ttpreport/gvisor-ligolo/runsc/flag"
    41  	"github.com/ttpreport/gvisor-ligolo/runsc/profile"
    42  	"github.com/ttpreport/gvisor-ligolo/runsc/specutils"
    43  	"golang.org/x/sys/unix"
    44  )
    45  
    46  // Note that directfsSandboxCaps is the same as caps defined in gofer.go
    47  // except CAP_SYS_CHROOT because we don't need to chroot in directfs mode.
    48  var directfsSandboxCaps = []string{
    49  	"CAP_CHOWN",
    50  	"CAP_DAC_OVERRIDE",
    51  	"CAP_DAC_READ_SEARCH",
    52  	"CAP_FOWNER",
    53  	"CAP_FSETID",
    54  }
    55  
    56  // directfsSandboxLinuxCaps is the minimal set of capabilities needed by the
    57  // sandbox to operate on files in directfs mode.
    58  var directfsSandboxLinuxCaps = &specs.LinuxCapabilities{
    59  	Bounding:  directfsSandboxCaps,
    60  	Effective: directfsSandboxCaps,
    61  	Permitted: directfsSandboxCaps,
    62  }
    63  
    64  // Boot implements subcommands.Command for the "boot" command which starts a
    65  // new sandbox. It should not be called directly.
    66  type Boot struct {
    67  	// bundleDir is the directory containing the OCI spec.
    68  	bundleDir string
    69  
    70  	// specFD is the file descriptor that the spec will be read from.
    71  	specFD int
    72  
    73  	// controllerFD is the file descriptor of a stream socket for the
    74  	// control server that is donated to this process.
    75  	controllerFD int
    76  
    77  	// deviceFD is the file descriptor for the platform device file.
    78  	deviceFD int
    79  
    80  	// ioFDs is the list of FDs used to connect to FS gofers.
    81  	ioFDs intFlags
    82  
    83  	// overlayFilestoreFDs are FDs to the regular files that will back the tmpfs
    84  	// upper mount in the overlay mounts.
    85  	overlayFilestoreFDs intFlags
    86  
    87  	// overlayMediums contains information about how the gofer mounts have been
    88  	// overlaid. The first entry is for rootfs and the following entries are for
    89  	// bind mounts in Spec.Mounts (in the same order).
    90  	overlayMediums boot.OverlayMediumFlags
    91  
    92  	// stdioFDs are the fds for stdin, stdout, and stderr. They must be
    93  	// provided in that order.
    94  	stdioFDs intFlags
    95  
    96  	// passFDs are mappings of user-supplied host to guest file descriptors.
    97  	passFDs fdMappings
    98  
    99  	// execFD is the host file descriptor used for program execution.
   100  	execFD int
   101  
   102  	// applyCaps determines if capabilities defined in the spec should be applied
   103  	// to the process.
   104  	applyCaps bool
   105  
   106  	// setUpChroot is set to true if the sandbox is started in an empty root.
   107  	setUpRoot bool
   108  
   109  	// cpuNum number of CPUs to create inside the sandbox.
   110  	cpuNum int
   111  
   112  	// totalMem sets the initial amount of total memory to report back to the
   113  	// container.
   114  	totalMem uint64
   115  
   116  	// totalHostMem is the total memory reported by host /proc/meminfo.
   117  	totalHostMem uint64
   118  
   119  	// userLogFD is the file descriptor to write user logs to.
   120  	userLogFD int
   121  
   122  	// startSyncFD is the file descriptor to synchronize runsc and sandbox.
   123  	startSyncFD int
   124  
   125  	// mountsFD is the file descriptor to read list of mounts after they have
   126  	// been resolved (direct paths, no symlinks). They are resolved outside the
   127  	// sandbox (e.g. gofer) and sent through this FD.
   128  	mountsFD int
   129  
   130  	podInitConfigFD int
   131  
   132  	sinkFDs intFlags
   133  
   134  	// pidns is set if the sandbox is in its own pid namespace.
   135  	pidns bool
   136  
   137  	// attached is set to true to kill the sandbox process when the parent process
   138  	// terminates. This flag is set when the command execve's itself because
   139  	// parent death signal doesn't propagate through execve when uid/gid changes.
   140  	attached bool
   141  
   142  	// productName is the value to show in
   143  	// /sys/devices/virtual/dmi/id/product_name.
   144  	productName string
   145  
   146  	// FDs for profile data.
   147  	profileFDs profile.FDArgs
   148  
   149  	// procMountSyncFD is a file descriptor that has to be closed when the
   150  	// procfs mount isn't needed anymore.
   151  	procMountSyncFD int
   152  
   153  	// syncUsernsFD is the file descriptor that has to be closed when the
   154  	// boot process should invoke setuid/setgid for root user. This is mainly
   155  	// used to synchronize rootless user namespace initialization.
   156  	syncUsernsFD int
   157  }
   158  
   159  // Name implements subcommands.Command.Name.
   160  func (*Boot) Name() string {
   161  	return "boot"
   162  }
   163  
   164  // Synopsis implements subcommands.Command.Synopsis.
   165  func (*Boot) Synopsis() string {
   166  	return "launch a sandbox process"
   167  }
   168  
   169  // Usage implements subcommands.Command.Usage.
   170  func (*Boot) Usage() string {
   171  	return `boot [flags] <container id>`
   172  }
   173  
   174  // SetFlags implements subcommands.Command.SetFlags.
   175  func (b *Boot) SetFlags(f *flag.FlagSet) {
   176  	f.StringVar(&b.bundleDir, "bundle", "", "required path to the root of the bundle directory")
   177  	f.BoolVar(&b.applyCaps, "apply-caps", false, "if true, apply capabilities defined in the spec to the process")
   178  	f.BoolVar(&b.setUpRoot, "setup-root", false, "if true, set up an empty root for the process")
   179  	f.BoolVar(&b.pidns, "pidns", false, "if true, the sandbox is in its own PID namespace")
   180  	f.IntVar(&b.cpuNum, "cpu-num", 0, "number of CPUs to create inside the sandbox")
   181  	f.IntVar(&b.procMountSyncFD, "proc-mount-sync-fd", -1, "file descriptor that has to be written to when /proc isn't needed anymore and can be unmounted")
   182  	f.IntVar(&b.syncUsernsFD, "sync-userns-fd", -1, "file descriptor used to synchronize rootless user namespace initialization.")
   183  	f.Uint64Var(&b.totalMem, "total-memory", 0, "sets the initial amount of total memory to report back to the container")
   184  	f.Uint64Var(&b.totalHostMem, "total-host-memory", 0, "total memory reported by host /proc/meminfo")
   185  	f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates")
   186  	f.StringVar(&b.productName, "product-name", "", "value to show in /sys/devices/virtual/dmi/id/product_name")
   187  
   188  	// Open FDs that are donated to the sandbox.
   189  	f.IntVar(&b.specFD, "spec-fd", -1, "required fd with the container spec")
   190  	f.IntVar(&b.controllerFD, "controller-fd", -1, "required FD of a stream socket for the control server that must be donated to this process")
   191  	f.IntVar(&b.deviceFD, "device-fd", -1, "FD for the platform device file")
   192  	f.Var(&b.ioFDs, "io-fds", "list of FDs to connect gofer clients. They must follow this order: root first, then mounts as defined in the spec")
   193  	f.Var(&b.stdioFDs, "stdio-fds", "list of FDs containing sandbox stdin, stdout, and stderr in that order")
   194  	f.Var(&b.passFDs, "pass-fd", "mapping of host to guest FDs. They must be in M:N format. M is the host and N the guest descriptor.")
   195  	f.IntVar(&b.execFD, "exec-fd", -1, "host file descriptor used for program execution.")
   196  	f.Var(&b.overlayFilestoreFDs, "overlay-filestore-fds", "FDs to the regular files that will back the tmpfs upper mount in the overlay mounts.")
   197  	f.Var(&b.overlayMediums, "overlay-mediums", "information about how the gofer mounts have been overlaid.")
   198  	f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.")
   199  	f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup")
   200  	f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).")
   201  	f.IntVar(&b.podInitConfigFD, "pod-init-config-fd", -1, "file descriptor to the pod init configuration file.")
   202  	f.Var(&b.sinkFDs, "sink-fds", "ordered list of file descriptors to be used by the sinks defined in --pod-init-config.")
   203  
   204  	// Profiling flags.
   205  	b.profileFDs.SetFromFlags(f)
   206  }
   207  
   208  // Execute implements subcommands.Command.Execute.  It starts a sandbox in a
   209  // waiting state.
   210  func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...any) subcommands.ExitStatus {
   211  	if b.specFD == -1 || b.controllerFD == -1 || b.startSyncFD == -1 || f.NArg() != 1 {
   212  		f.Usage()
   213  		return subcommands.ExitUsageError
   214  	}
   215  
   216  	conf := args[0].(*config.Config)
   217  
   218  	// Set traceback level
   219  	debug.SetTraceback(conf.Traceback)
   220  
   221  	// Initialize CPUID information.
   222  	cpuid.Initialize()
   223  
   224  	// Initialize ring0 library.
   225  	ring0.InitDefault()
   226  
   227  	if len(b.productName) == 0 {
   228  		// Do this before chroot takes effect, otherwise we can't read /sys.
   229  		if product, err := ioutil.ReadFile("/sys/devices/virtual/dmi/id/product_name"); err != nil {
   230  			log.Warningf("Not setting product_name: %v", err)
   231  		} else {
   232  			b.productName = strings.TrimSpace(string(product))
   233  			log.Infof("Setting product_name: %q", b.productName)
   234  		}
   235  	}
   236  
   237  	if b.attached {
   238  		// Ensure this process is killed after parent process terminates when
   239  		// attached mode is enabled. In the unfortunate event that the parent
   240  		// terminates before this point, this process leaks.
   241  		if err := unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0); err != nil {
   242  			util.Fatalf("error setting parent death signal: %v", err)
   243  		}
   244  	}
   245  
   246  	syncUsernsForRootless(b.syncUsernsFD)
   247  
   248  	// Get the spec from the specFD. We *must* keep this os.File alive past
   249  	// the call setCapsAndCallSelf, otherwise the FD will be closed and the
   250  	// child process cannot read it
   251  	specFile := os.NewFile(uintptr(b.specFD), "spec file")
   252  	spec, err := specutils.ReadSpecFromFile(b.bundleDir, specFile, conf)
   253  	if err != nil {
   254  		util.Fatalf("reading spec: %v", err)
   255  	}
   256  
   257  	if b.setUpRoot {
   258  		if err := setUpChroot(b.pidns, spec, conf); err != nil {
   259  			util.Fatalf("error setting up chroot: %v", err)
   260  		}
   261  
   262  		if !conf.Rootless {
   263  			// /proc is umounted from a forked process, because the
   264  			// current one is going to re-execute itself without
   265  			// capabilities.
   266  			cmd, w := execProcUmounter()
   267  			defer cmd.Wait()
   268  			defer w.Close()
   269  			if b.procMountSyncFD != -1 {
   270  				panic("procMountSyncFD is set")
   271  			}
   272  			b.procMountSyncFD = int(w.Fd())
   273  
   274  			// Clear FD_CLOEXEC. Regardless of b.applyCaps, this process will be
   275  			// re-executed. procMountSyncFD should remain open.
   276  			if _, _, errno := unix.RawSyscall(unix.SYS_FCNTL, w.Fd(), unix.F_SETFD, 0); errno != 0 {
   277  				util.Fatalf("error clearing CLOEXEC: %v", errno)
   278  			}
   279  
   280  			if !b.applyCaps {
   281  				// Remove the args that have already been done before calling self.
   282  				args := b.prepareArgs("setup-root", "sync-userns-fd")
   283  
   284  				// Note that we've already read the spec from the spec FD, and
   285  				// we will read it again after the exec call. This works
   286  				// because the ReadSpecFromFile function seeks to the beginning
   287  				// of the file before reading.
   288  				util.Fatalf("callSelfAsNobody(%v): %v", args, callSelfAsNobody(args))
   289  				panic("unreachable")
   290  			}
   291  		}
   292  	}
   293  
   294  	specutils.LogSpecDebug(spec, conf.OCISeccomp)
   295  
   296  	if b.applyCaps {
   297  		caps := spec.Process.Capabilities
   298  		if caps == nil {
   299  			caps = &specs.LinuxCapabilities{}
   300  		}
   301  
   302  		gPlatform, err := platform.Lookup(conf.Platform)
   303  		if err != nil {
   304  			util.Fatalf("loading platform: %v", err)
   305  		}
   306  		if gPlatform.Requirements().RequiresCapSysPtrace {
   307  			// Ptrace platform requires extra capabilities.
   308  			const c = "CAP_SYS_PTRACE"
   309  			caps.Bounding = append(caps.Bounding, c)
   310  			caps.Effective = append(caps.Effective, c)
   311  			caps.Permitted = append(caps.Permitted, c)
   312  		}
   313  
   314  		if conf.DirectFS {
   315  			caps = specutils.MergeCapabilities(caps, directfsSandboxLinuxCaps)
   316  		}
   317  
   318  		// Remove the args that have already been done before calling self.
   319  		args := b.prepareArgs("setup-root", "sync-userns-fd", "apply-caps")
   320  
   321  		// Note that we've already read the spec from the spec FD, and
   322  		// we will read it again after the exec call. This works
   323  		// because the ReadSpecFromFile function seeks to the beginning
   324  		// of the file before reading.
   325  		util.Fatalf("setCapsAndCallSelf(%v, %v): %v", args, caps, setCapsAndCallSelf(args, caps))
   326  
   327  		// This prevents the specFile finalizer from running and closed
   328  		// the specFD, which we have passed to ourselves when
   329  		// re-execing.
   330  		runtime.KeepAlive(specFile)
   331  		panic("unreachable")
   332  	}
   333  
   334  	if b.syncUsernsFD >= 0 {
   335  		// syncUsernsFD is set, but runsc hasn't been re-executed with a new UID and GID.
   336  		// We expect that setCapsAndCallSelf has to be called in this case.
   337  		panic("unreachable")
   338  	}
   339  
   340  	// Close specFile to avoid exposing it to the sandbox.
   341  	if err := specFile.Close(); err != nil {
   342  		util.Fatalf("closing specFile: %v", err)
   343  	}
   344  
   345  	// At this point we won't re-execute, so it's safe to limit via rlimits. Any
   346  	// limit >= 0 works. If the limit is lower than the current number of open
   347  	// files, then Setrlimit will succeed, and the next open will fail.
   348  	if conf.FDLimit > -1 {
   349  		rlimit := unix.Rlimit{
   350  			Cur: uint64(conf.FDLimit),
   351  			Max: uint64(conf.FDLimit),
   352  		}
   353  		switch err := unix.Setrlimit(unix.RLIMIT_NOFILE, &rlimit); err {
   354  		case nil:
   355  		case unix.EPERM:
   356  			log.Warningf("FD limit %d is higher than the current hard limit or system-wide maximum", conf.FDLimit)
   357  		default:
   358  			util.Fatalf("Failed to set RLIMIT_NOFILE: %v", err)
   359  		}
   360  	}
   361  
   362  	// Read resolved mount list and replace the original one from the spec.
   363  	mountsFile := os.NewFile(uintptr(b.mountsFD), "mounts file")
   364  	cleanMounts, err := specutils.ReadMounts(mountsFile)
   365  	if err != nil {
   366  		mountsFile.Close()
   367  		util.Fatalf("Error reading mounts file: %v", err)
   368  	}
   369  	mountsFile.Close()
   370  	spec.Mounts = cleanMounts
   371  
   372  	if conf.DirectFS {
   373  		// sandbox should run with a umask of 0, because we want to preserve file
   374  		// modes exactly as sent by the sentry, which would have already applied
   375  		// the application umask.
   376  		unix.Umask(0)
   377  	}
   378  
   379  	if conf.EnableCoreTags {
   380  		if err := coretag.Enable(); err != nil {
   381  			util.Fatalf("Failed to core tag sentry: %v", err)
   382  		}
   383  
   384  		// Verify that all sentry threads are properly core tagged, and log
   385  		// current core tag.
   386  		coreTags, err := coretag.GetAllCoreTags(os.Getpid())
   387  		if err != nil {
   388  			util.Fatalf("Failed read current core tags: %v", err)
   389  		}
   390  		if len(coreTags) != 1 {
   391  			util.Fatalf("Not all child threads were core tagged the same. Tags=%v", coreTags)
   392  		}
   393  		log.Infof("Core tag enabled (core tag=%d)", coreTags[0])
   394  	}
   395  
   396  	// Create the loader.
   397  	bootArgs := boot.Args{
   398  		ID:                  f.Arg(0),
   399  		Spec:                spec,
   400  		Conf:                conf,
   401  		ControllerFD:        b.controllerFD,
   402  		Device:              os.NewFile(uintptr(b.deviceFD), "platform device"),
   403  		GoferFDs:            b.ioFDs.GetArray(),
   404  		StdioFDs:            b.stdioFDs.GetArray(),
   405  		PassFDs:             b.passFDs.GetArray(),
   406  		ExecFD:              b.execFD,
   407  		OverlayFilestoreFDs: b.overlayFilestoreFDs.GetArray(),
   408  		OverlayMediums:      b.overlayMediums.GetArray(),
   409  		NumCPU:              b.cpuNum,
   410  		TotalMem:            b.totalMem,
   411  		TotalHostMem:        b.totalHostMem,
   412  		UserLogFD:           b.userLogFD,
   413  		ProductName:         b.productName,
   414  		PodInitConfigFD:     b.podInitConfigFD,
   415  		SinkFDs:             b.sinkFDs.GetArray(),
   416  		ProfileOpts:         b.profileFDs.ToOpts(),
   417  	}
   418  	l, err := boot.New(bootArgs)
   419  	if err != nil {
   420  		util.Fatalf("creating loader: %v", err)
   421  	}
   422  
   423  	// Fatalf exits the process and doesn't run defers.
   424  	// 'l' must be destroyed explicitly after this point!
   425  
   426  	if b.procMountSyncFD != -1 {
   427  		l.PreSeccompCallback = func() {
   428  			// Call validateOpenFDs() before umounting /proc.
   429  			validateOpenFDs(bootArgs.PassFDs)
   430  			// Umount /proc right before installing seccomp filters.
   431  			umountProc(b.procMountSyncFD)
   432  		}
   433  	}
   434  
   435  	// Prepare metrics.
   436  	// This needs to happen after the kernel is initialized (such that all metrics are registered)
   437  	// but before the start-sync file is notified, as the parent process needs to query for
   438  	// registered metrics prior to sending the start signal.
   439  	metric.Initialize()
   440  
   441  	// Notify the parent process the sandbox has booted (and that the controller
   442  	// is up).
   443  	startSyncFile := os.NewFile(uintptr(b.startSyncFD), "start-sync file")
   444  	buf := make([]byte, 1)
   445  	if w, err := startSyncFile.Write(buf); err != nil || w != 1 {
   446  		l.Destroy()
   447  		util.Fatalf("unable to write into the start-sync descriptor: %v", err)
   448  	}
   449  	// Closes startSyncFile because 'l.Run()' only returns when the sandbox exits.
   450  	startSyncFile.Close()
   451  
   452  	// Wait for the start signal from runsc.
   453  	l.WaitForStartSignal()
   454  
   455  	// Run the application and wait for it to finish.
   456  	if err := l.Run(); err != nil {
   457  		l.Destroy()
   458  		util.Fatalf("running sandbox: %v", err)
   459  	}
   460  
   461  	ws := l.WaitExit()
   462  	log.Infof("application exiting with %+v", ws)
   463  	waitStatus := args[1].(*unix.WaitStatus)
   464  	*waitStatus = unix.WaitStatus(ws)
   465  	l.Destroy()
   466  	return subcommands.ExitSuccess
   467  }
   468  
   469  func (b *Boot) prepareArgs(exclude ...string) []string {
   470  	var args []string
   471  	for _, arg := range os.Args {
   472  		for _, excl := range exclude {
   473  			if strings.Contains(arg, excl) {
   474  				goto skip
   475  			}
   476  		}
   477  		args = append(args, arg)
   478  		// Some parameters are not already part of os.Args because they are
   479  		// solely configured by Boot.Execute(). Strategically add these parameters
   480  		// after the command and before the container ID at the end.
   481  		if arg == "boot" {
   482  			if b.procMountSyncFD != -1 {
   483  				args = append(args, fmt.Sprintf("--proc-mount-sync-fd=%d", b.procMountSyncFD))
   484  			}
   485  			if len(b.productName) > 0 {
   486  				args = append(args, "--product-name", b.productName)
   487  			}
   488  		}
   489  	skip:
   490  	}
   491  	return args
   492  }
   493  
   494  // execProcUmounter execute a child process that umounts /proc when the
   495  // returned pipe is closed.
   496  func execProcUmounter() (*exec.Cmd, *os.File) {
   497  	r, w, err := os.Pipe()
   498  	if err != nil {
   499  		util.Fatalf("error creating a pipe: %v", err)
   500  	}
   501  	defer r.Close()
   502  
   503  	cmd := exec.Command(specutils.ExePath)
   504  	cmd.Args = append(cmd.Args, "umount", "--sync-fd=3", "/proc")
   505  	cmd.ExtraFiles = append(cmd.ExtraFiles, r)
   506  	cmd.Stdin = os.Stdin
   507  	cmd.Stdout = os.Stdout
   508  	cmd.Stderr = os.Stderr
   509  	if err := cmd.Start(); err != nil {
   510  		util.Fatalf("error executing umounter: %v", err)
   511  	}
   512  	return cmd, w
   513  }
   514  
   515  // umountProc writes to syncFD signalling the process started by
   516  // execProcUmounter() to umount /proc.
   517  func umountProc(syncFD int) {
   518  	syncFile := os.NewFile(uintptr(syncFD), "procfs umount sync FD")
   519  	buf := make([]byte, 1)
   520  	if w, err := syncFile.Write(buf); err != nil || w != 1 {
   521  		util.Fatalf("unable to write into the proc umounter descriptor: %v", err)
   522  	}
   523  	syncFile.Close()
   524  
   525  	var waitStatus unix.WaitStatus
   526  	if _, err := unix.Wait4(0, &waitStatus, 0, nil); err != nil {
   527  		util.Fatalf("error waiting for the proc umounter process: %v", err)
   528  	}
   529  	if !waitStatus.Exited() || waitStatus.ExitStatus() != 0 {
   530  		util.Fatalf("the proc umounter process failed: %v", waitStatus)
   531  	}
   532  	if err := unix.Access("/proc/self", unix.F_OK); err != unix.ENOENT {
   533  		util.Fatalf("/proc is still accessible")
   534  	}
   535  }
   536  
   537  // validateOpenFDs checks that the sandbox process does not have any open
   538  // directory FDs.
   539  func validateOpenFDs(passFDs []boot.FDMapping) {
   540  	passHostFDs := make(map[int]struct{})
   541  	for _, passFD := range passFDs {
   542  		passHostFDs[passFD.Host] = struct{}{}
   543  	}
   544  	const selfFDDir = "/proc/self/fd"
   545  	if err := filepath.WalkDir(selfFDDir, func(path string, d os.DirEntry, err error) error {
   546  		if err != nil {
   547  			return err
   548  		}
   549  		if d.Type() != os.ModeSymlink {
   550  			// All entries are symlinks. Ignore the callback for fd directory itself.
   551  			return nil
   552  		}
   553  		if fdInfo, err := os.Stat(path); err != nil {
   554  			if os.IsNotExist(err) {
   555  				// Ignore FDs that are now closed. For example, the FD to selfFDDir that
   556  				// was opened by filepath.WalkDir() to read dirents.
   557  				return nil
   558  			}
   559  			return fmt.Errorf("os.Stat(%s) failed: %v", path, err)
   560  		} else if !fdInfo.IsDir() {
   561  			return nil
   562  		}
   563  		// Uh-oh. This is a directory FD.
   564  		fdNo, err := strconv.Atoi(d.Name())
   565  		if err != nil {
   566  			return fmt.Errorf("strconv.Atoi(%s) failed: %v", d.Name(), err)
   567  		}
   568  		dirLink, err := os.Readlink(path)
   569  		if err != nil {
   570  			return fmt.Errorf("os.Readlink(%s) failed: %v", path, err)
   571  		}
   572  		if _, ok := passHostFDs[fdNo]; ok {
   573  			// Passed FDs are allowed to be directories. The user must be knowing
   574  			// what they are doing. Log a warning regardless.
   575  			log.Warningf("Sandbox has access to FD %d, which is a directory for %s", fdNo, dirLink)
   576  			return nil
   577  		}
   578  		return fmt.Errorf("FD %d is a directory for %s", fdNo, dirLink)
   579  	}); err != nil {
   580  		util.Fatalf("WalkDir(%s) failed: %v", selfFDDir, err)
   581  	}
   582  }