github.com/rootless-containers/rootlesskit/v2@v2.3.4/pkg/child/child.go (about)

     1  package child
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"os"
     8  	"os/exec"
     9  	"os/signal"
    10  	"path/filepath"
    11  	"runtime"
    12  	"strconv"
    13  	"syscall"
    14  	"time"
    15  
    16  	"github.com/containernetworking/plugins/pkg/ns"
    17  	"github.com/rootless-containers/rootlesskit/v2/pkg/common"
    18  	"github.com/rootless-containers/rootlesskit/v2/pkg/copyup"
    19  	"github.com/rootless-containers/rootlesskit/v2/pkg/messages"
    20  	"github.com/rootless-containers/rootlesskit/v2/pkg/network"
    21  	"github.com/rootless-containers/rootlesskit/v2/pkg/port"
    22  	"github.com/rootless-containers/rootlesskit/v2/pkg/sigproxy"
    23  	sigproxysignal "github.com/rootless-containers/rootlesskit/v2/pkg/sigproxy/signal"
    24  	"github.com/sirupsen/logrus"
    25  	"golang.org/x/sys/unix"
    26  )
    27  
    28  var propagationStates = map[string]uintptr{
    29  	"private":  uintptr(unix.MS_PRIVATE),
    30  	"rprivate": uintptr(unix.MS_REC | unix.MS_PRIVATE),
    31  	"shared":   uintptr(unix.MS_SHARED),
    32  	"rshared":  uintptr(unix.MS_REC | unix.MS_SHARED),
    33  	"slave":    uintptr(unix.MS_SLAVE),
    34  	"rslave":   uintptr(unix.MS_REC | unix.MS_SLAVE),
    35  }
    36  
    37  func setupFiles(cmd *exec.Cmd) {
    38  	// 0 1 and 2  are used for stdin. stdout, and stderr
    39  	const firstExtraFD = 3
    40  	systemdActivationFDs := 0
    41  	// check for systemd socket activation sockets
    42  	if v := os.Getenv("LISTEN_FDS"); v != "" {
    43  		if num, err := strconv.Atoi(v); err == nil {
    44  			systemdActivationFDs = num
    45  			cmd.ExtraFiles = make([]*os.File, systemdActivationFDs)
    46  		}
    47  	}
    48  	for fd := 0; fd < systemdActivationFDs; fd++ {
    49  		cmd.ExtraFiles[fd] = os.NewFile(uintptr(firstExtraFD+fd), "")
    50  	}
    51  }
    52  
    53  func createCmd(opt Opt) (*exec.Cmd, error) {
    54  	fixListenPidEnv, err := strconv.ParseBool(os.Getenv(opt.ChildUseActivationEnvKey))
    55  	if err != nil {
    56  		fixListenPidEnv = false
    57  	}
    58  	os.Unsetenv(opt.ChildUseActivationEnvKey)
    59  	targetCmd := opt.TargetCmd
    60  	var cmd *exec.Cmd
    61  	cmdEnv := os.Environ()
    62  	if fixListenPidEnv {
    63  		cmd = exec.Command("/proc/self/exe", os.Args[1:]...)
    64  		cmdEnv = append(cmdEnv, opt.RunActivationHelperEnvKey+"=true")
    65  	} else {
    66  		var args []string
    67  		if len(targetCmd) > 1 {
    68  			args = targetCmd[1:]
    69  		}
    70  		cmd = exec.Command(targetCmd[0], args...)
    71  	}
    72  	cmd.Stdin = os.Stdin
    73  	cmd.Stdout = os.Stdout
    74  	cmd.Stderr = os.Stderr
    75  	cmd.Env = cmdEnv
    76  	cmd.SysProcAttr = &syscall.SysProcAttr{
    77  		Pdeathsig: syscall.SIGKILL,
    78  	}
    79  	setupFiles(cmd)
    80  	return cmd, nil
    81  }
    82  
    83  // mountSysfs is needed for mounting /sys/class/net
    84  // when netns is unshared.
    85  func mountSysfs(hostNetwork, evacuateCgroup2 bool) error {
    86  	const cgroupDir = "/sys/fs/cgroup"
    87  	if hostNetwork {
    88  		if evacuateCgroup2 {
    89  			// We need to mount tmpfs before cgroup2 to avoid EBUSY
    90  			if err := unix.Mount("none", cgroupDir, "tmpfs", 0, ""); err != nil {
    91  				return fmt.Errorf("failed to mount tmpfs on %s: %w", cgroupDir, err)
    92  			}
    93  			if err := unix.Mount("none", cgroupDir, "cgroup2", 0, ""); err != nil {
    94  				return fmt.Errorf("failed to mount cgroup2 on %s: %w", cgroupDir, err)
    95  			}
    96  		}
    97  		// NOP
    98  		return nil
    99  	}
   100  
   101  	tmp, err := os.MkdirTemp("/tmp", "rksys")
   102  	if err != nil {
   103  		return fmt.Errorf("creating a directory under /tmp: %w", err)
   104  	}
   105  	defer os.RemoveAll(tmp)
   106  	if !evacuateCgroup2 {
   107  		if err := unix.Mount(cgroupDir, tmp, "", uintptr(unix.MS_BIND|unix.MS_REC), ""); err != nil {
   108  			return fmt.Errorf("failed to create bind mount on %s: %w", cgroupDir, err)
   109  		}
   110  	}
   111  
   112  	if err := unix.Mount("none", "/sys", "sysfs", 0, ""); err != nil {
   113  		// when the sysfs in the parent namespace is RO,
   114  		// we can't mount RW sysfs even in the child namespace.
   115  		// https://github.com/rootless-containers/rootlesskit/pull/23#issuecomment-429292632
   116  		// https://github.com/torvalds/linux/blob/9f203e2f2f065cd74553e6474f0ae3675f39fb0f/fs/namespace.c#L3326-L3328
   117  		logrus.Warnf("failed to mount sysfs, falling back to read-only mount: %v", err)
   118  		if err := unix.Mount("none", "/sys", "sysfs", uintptr(unix.MS_RDONLY), ""); err != nil {
   119  			// when /sys/firmware is masked, even RO sysfs can't be mounted
   120  			logrus.Warnf("failed to mount sysfs: %v", err)
   121  		}
   122  	}
   123  	if evacuateCgroup2 {
   124  		if err := unix.Mount("none", cgroupDir, "cgroup2", 0, ""); err != nil {
   125  			return fmt.Errorf("failed to mount cgroup2 on %s: %w", cgroupDir, err)
   126  		}
   127  	} else {
   128  		if err := unix.Mount(tmp, cgroupDir, "", uintptr(unix.MS_MOVE), ""); err != nil {
   129  			return fmt.Errorf("failed to move mount point from %s to %s: %w", tmp, cgroupDir, err)
   130  		}
   131  	}
   132  	return nil
   133  }
   134  
   135  func mountProcfs() error {
   136  	if err := unix.Mount("none", "/proc", "proc", 0, ""); err != nil {
   137  		logrus.Warnf("failed to mount procfs, falling back to read-only mount: %v", err)
   138  		if err := unix.Mount("none", "/proc", "proc", uintptr(unix.MS_RDONLY), ""); err != nil {
   139  			logrus.Warnf("failed to mount procfs: %v", err)
   140  		}
   141  	}
   142  	return nil
   143  }
   144  
   145  func activateLoopback() error {
   146  	cmds := [][]string{
   147  		{"ip", "link", "set", "lo", "up"},
   148  	}
   149  	if err := common.Execs(os.Stderr, os.Environ(), cmds); err != nil {
   150  		return fmt.Errorf("executing %v: %w", cmds, err)
   151  	}
   152  	return nil
   153  }
   154  
   155  func activateDev(dev, ip string, netmask int, gateway string, mtu int) error {
   156  	cmds := [][]string{
   157  		{"ip", "link", "set", dev, "up"},
   158  		{"ip", "link", "set", "dev", dev, "mtu", strconv.Itoa(mtu)},
   159  		{"ip", "addr", "add", ip + "/" + strconv.Itoa(netmask), "dev", dev},
   160  		{"ip", "route", "add", "default", "via", gateway, "dev", dev},
   161  	}
   162  	if err := common.Execs(os.Stderr, os.Environ(), cmds); err != nil {
   163  		return fmt.Errorf("executing %v: %w", cmds, err)
   164  	}
   165  	return nil
   166  }
   167  
   168  func setupCopyDir(driver copyup.ChildDriver, dirs []string) (bool, error) {
   169  	if driver != nil {
   170  		etcWasCopied := false
   171  		copied, err := driver.CopyUp(dirs)
   172  		for _, d := range copied {
   173  			if d == "/etc" {
   174  				etcWasCopied = true
   175  				break
   176  			}
   177  		}
   178  		return etcWasCopied, err
   179  	}
   180  	if len(dirs) != 0 {
   181  		return false, errors.New("copy-up driver is not specified")
   182  	}
   183  	return false, nil
   184  }
   185  
   186  // setupNet sets up the network driver.
   187  //
   188  // NOTE: msg is altered during calling driver.ConfigureNetworkChild
   189  func setupNet(stateDir string, msg *messages.ParentInitNetworkDriverCompleted, etcWasCopied bool, driver network.ChildDriver, detachedNetNSPath string) error {
   190  	// HostNetwork
   191  	if driver == nil {
   192  		return nil
   193  	}
   194  
   195  	stateDirResolvConf := filepath.Join(stateDir, "resolv.conf")
   196  	hostsContent, err := generateEtcHosts()
   197  	if err != nil {
   198  		return err
   199  	}
   200  	stateDirHosts := filepath.Join(stateDir, "hosts")
   201  	if err := os.WriteFile(stateDirHosts, hostsContent, 0644); err != nil {
   202  		return fmt.Errorf("writing %s: %w", stateDirHosts, err)
   203  	}
   204  
   205  	if detachedNetNSPath == "" {
   206  		// non-detached mode
   207  		if err := activateLoopback(); err != nil {
   208  			return err
   209  		}
   210  		dev, err := driver.ConfigureNetworkChild(msg, detachedNetNSPath) // alters msg
   211  		if err != nil {
   212  			return err
   213  		}
   214  		if err := os.WriteFile(stateDirResolvConf, generateResolvConf(msg.DNS), 0644); err != nil {
   215  			return fmt.Errorf("writing %s: %w", stateDirResolvConf, err)
   216  		}
   217  		Info, _ := driver.ChildDriverInfo()
   218  		if !Info.ConfiguresInterface {
   219  			if err := activateDev(dev, msg.IP, msg.Netmask, msg.Gateway, msg.MTU); err != nil {
   220  				return err
   221  			}
   222  		}
   223  		if etcWasCopied {
   224  			// remove copied-up link
   225  			for _, f := range []string{"/etc/resolv.conf", "/etc/hosts"} {
   226  				if err := os.RemoveAll(f); err != nil {
   227  					return fmt.Errorf("failed to remove copied-up link %q: %w", f, err)
   228  				}
   229  				if err := os.WriteFile(f, []byte{}, 0644); err != nil {
   230  					return fmt.Errorf("writing %s: %w", f, err)
   231  				}
   232  			}
   233  		} else {
   234  			logrus.Warn("Mounting /etc/resolv.conf without copying-up /etc. " +
   235  				"Note that /etc/resolv.conf in the namespace will be unmounted when it is recreated on the host. " +
   236  				"Unless /etc/resolv.conf is statically configured, copying-up /etc is highly recommended. " +
   237  				"Please refer to RootlessKit documentation for further information.")
   238  		}
   239  		if err := unix.Mount(stateDirResolvConf, "/etc/resolv.conf", "", uintptr(unix.MS_BIND), ""); err != nil {
   240  			return fmt.Errorf("failed to create bind mount /etc/resolv.conf for %s: %w", stateDirResolvConf, err)
   241  		}
   242  		if err := unix.Mount(stateDirHosts, "/etc/hosts", "", uintptr(unix.MS_BIND), ""); err != nil {
   243  			return fmt.Errorf("failed to create bind mount /etc/hosts for %s: %w", stateDirHosts, err)
   244  		}
   245  	} else {
   246  		// detached mode
   247  		if err := ns.WithNetNSPath(detachedNetNSPath, func(_ ns.NetNS) error {
   248  			return activateLoopback()
   249  		}); err != nil {
   250  			return err
   251  		}
   252  		dev, err := driver.ConfigureNetworkChild(msg, detachedNetNSPath) // alters msg
   253  		if err != nil {
   254  			return err
   255  		}
   256  		if err := os.WriteFile(stateDirResolvConf, generateResolvConf(msg.DNS), 0644); err != nil {
   257  			return fmt.Errorf("writing %s: %w", stateDirResolvConf, err)
   258  		}
   259  		if err := ns.WithNetNSPath(detachedNetNSPath, func(_ ns.NetNS) error {
   260  			Info, _ := driver.ChildDriverInfo()
   261  			if !Info.ConfiguresInterface {
   262  				return activateDev(dev, msg.IP, msg.Netmask, msg.Gateway, msg.MTU)
   263  			}
   264  			return nil
   265  		}); err != nil {
   266  			return err
   267  		}
   268  	}
   269  	return nil
   270  }
   271  
   272  type Opt struct {
   273  	PipeFDEnvKey              string              // needs to be set
   274  	RunActivationHelperEnvKey string              // needs to be set
   275  	ChildUseActivationEnvKey  string              // needs to be set
   276  	StateDirEnvKey            string              // needs to be set
   277  	TargetCmd                 []string            // needs to be set
   278  	NetworkDriver             network.ChildDriver // nil for HostNetwork
   279  	CopyUpDriver              copyup.ChildDriver  // cannot be nil if len(CopyUpDirs) != 0
   280  	CopyUpDirs                []string
   281  	DetachNetNS               bool
   282  	PortDriver                port.ChildDriver
   283  	MountProcfs               bool   // needs to be set if (and only if) parent.Opt.CreatePIDNS is set
   284  	Propagation               string // mount propagation type
   285  	Reaper                    bool
   286  	EvacuateCgroup2           bool // needs to correspond to parent.Opt.EvacuateCgroup2 is set
   287  }
   288  
   289  // statPIDNS is from https://github.com/containerd/containerd/blob/v1.7.2/services/introspection/pidns_linux.go#L25-L36
   290  func statPIDNS(pid int) (uint64, error) {
   291  	f := fmt.Sprintf("/proc/%d/ns/pid", pid)
   292  	st, err := os.Stat(f)
   293  	if err != nil {
   294  		return 0, err
   295  	}
   296  	stSys, ok := st.Sys().(*syscall.Stat_t)
   297  	if !ok {
   298  		return 0, fmt.Errorf("%T is not *syscall.Stat_t", st.Sys())
   299  	}
   300  	return stSys.Ino, nil
   301  }
   302  
   303  func hasCaps() (bool, error) {
   304  	pid := os.Getpid()
   305  	hdr := unix.CapUserHeader{
   306  		Version: unix.LINUX_CAPABILITY_VERSION_3,
   307  		Pid:     int32(pid),
   308  	}
   309  	var data unix.CapUserData
   310  	if err := unix.Capget(&hdr, &data); err != nil {
   311  		return false, fmt.Errorf("failed to get the current caps: %w", err)
   312  	}
   313  	logrus.Debugf("Capabilities: %+v", data)
   314  	return data.Effective != 0, nil
   315  }
   316  
   317  // gainCaps gains the caps inside the user namespace.
   318  // The caps are gained on re-execution after the child's uid_map and gid_map are fully written.
   319  func gainCaps() error {
   320  	pid := os.Getpid()
   321  	pidns, err := statPIDNS(pid)
   322  	if err != nil {
   323  		logrus.WithError(err).Debug("Failed to stat pidns (negligible when unsharing pidns)")
   324  		pidns = 0
   325  	}
   326  	envName := fmt.Sprintf("_ROOTLESSKIT_REEXEC_COUNT_%d_%d", pidns, pid)
   327  	logrus.Debugf("Re-executing the RootlessKit child process (PID=%d) to gain the caps", pid)
   328  
   329  	var envValueInt int
   330  	if envValueStr := os.Getenv(envName); envValueStr != "" {
   331  		var err error
   332  		envValueInt, err = strconv.Atoi(envValueStr)
   333  		if err != nil {
   334  			return fmt.Errorf("failed to parse %s value %q: %w", envName, envValueStr, err)
   335  		}
   336  	}
   337  	if envValueInt > 5 {
   338  		time.Sleep(10 * time.Millisecond * time.Duration(envValueInt))
   339  	}
   340  	if envValueInt > 10 {
   341  		return fmt.Errorf("no capabilities was gained after reexecuting the child (%s=%d)", envName, envValueInt)
   342  	}
   343  	logrus.Debugf("%s: %d->%d", envName, envValueInt, envValueInt+1)
   344  	os.Setenv(envName, strconv.Itoa(envValueInt+1))
   345  
   346  	// PID should be kept after re-execution.
   347  	if err := syscall.Exec("/proc/self/exe", os.Args, os.Environ()); err != nil {
   348  		return err
   349  	}
   350  	panic("should not reach here")
   351  }
   352  
   353  func Child(opt Opt) error {
   354  	if opt.PipeFDEnvKey == "" {
   355  		return errors.New("pipe FD env key is not set")
   356  	}
   357  	pipeFDStr := os.Getenv(opt.PipeFDEnvKey)
   358  	if pipeFDStr == "" {
   359  		return fmt.Errorf("%s is not set", opt.PipeFDEnvKey)
   360  	}
   361  	var pipeFD, pipe2FD int
   362  	if _, err := fmt.Sscanf(pipeFDStr, "%d,%d", &pipeFD, &pipe2FD); err != nil {
   363  		return fmt.Errorf("unexpected fd value: %s: %w", pipeFDStr, err)
   364  	}
   365  	logrus.Debugf("pipeFD=%d, pipe2FD=%d", pipeFD, pipe2FD)
   366  	pipeR := os.NewFile(uintptr(pipeFD), "")
   367  	pipe2W := os.NewFile(uintptr(pipe2FD), "")
   368  
   369  	if opt.StateDirEnvKey == "" {
   370  		opt.StateDirEnvKey = "ROOTLESSKIT_STATE_DIR" // for backward compatibility of Go API
   371  	}
   372  	stateDir := os.Getenv(opt.StateDirEnvKey)
   373  	if stateDir == "" {
   374  		return errors.New("got empty StateDir")
   375  	}
   376  
   377  	var (
   378  		msg *messages.Message
   379  		err error
   380  	)
   381  	if ok, err := hasCaps(); err != nil {
   382  		return err
   383  	} else if !ok {
   384  		msg, err = messages.WaitFor(pipeR, messages.Name(messages.ParentHello{}))
   385  		if err != nil {
   386  			return err
   387  		}
   388  
   389  		msgChildHello := &messages.Message{
   390  			U: messages.U{
   391  				ChildHello: &messages.ChildHello{},
   392  			},
   393  		}
   394  		if err := messages.Send(pipe2W, msgChildHello); err != nil {
   395  			return err
   396  		}
   397  
   398  		msg, err = messages.WaitFor(pipeR, messages.Name(messages.ParentInitIdmapCompleted{}))
   399  		if err != nil {
   400  			return err
   401  		}
   402  
   403  		if err := gainCaps(); err != nil {
   404  			return fmt.Errorf("failed to gain the caps inside the user namespace: %w", err)
   405  		}
   406  	}
   407  
   408  	if opt.MountProcfs {
   409  		if err := mountProcfs(); err != nil {
   410  			return err
   411  		}
   412  	}
   413  
   414  	var detachedNetNSPath string
   415  	if opt.DetachNetNS {
   416  		detachedNetNSPath = filepath.Join(stateDir, "netns")
   417  		if err = NewNetNsWithPathWithoutEnter(detachedNetNSPath); err != nil {
   418  			return fmt.Errorf("failed to create a detached netns on %q: %w", detachedNetNSPath, err)
   419  		}
   420  	}
   421  
   422  	msgChildInitUserNSCompleted := &messages.Message{
   423  		U: messages.U{
   424  			ChildInitUserNSCompleted: &messages.ChildInitUserNSCompleted{},
   425  		},
   426  	}
   427  	if err := messages.Send(pipe2W, msgChildInitUserNSCompleted); err != nil {
   428  		return err
   429  	}
   430  
   431  	msg, err = messages.WaitFor(pipeR, messages.Name(messages.ParentInitNetworkDriverCompleted{}))
   432  	if err != nil {
   433  		return err
   434  	}
   435  	netMsg := msg.U.ParentInitNetworkDriverCompleted
   436  
   437  	msg, err = messages.WaitFor(pipeR, messages.Name(messages.ParentInitPortDriverCompleted{}))
   438  	if err != nil {
   439  		return err
   440  	}
   441  	portMsg := msg.U.ParentInitPortDriverCompleted
   442  
   443  	// The parent calls child with Pdeathsig, but it is cleared when newuidmap SUID binary is called
   444  	// https://github.com/rootless-containers/rootlesskit/issues/65#issuecomment-492343646
   445  	runtime.LockOSThread()
   446  	err = unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0)
   447  	runtime.UnlockOSThread()
   448  	if err != nil {
   449  		return err
   450  	}
   451  	os.Unsetenv(opt.PipeFDEnvKey)
   452  	if err := pipeR.Close(); err != nil {
   453  		return fmt.Errorf("failed to close fd %d: %w", pipeFD, err)
   454  	}
   455  	if err := setMountPropagation(opt.Propagation); err != nil {
   456  		return err
   457  	}
   458  	etcWasCopied, err := setupCopyDir(opt.CopyUpDriver, opt.CopyUpDirs)
   459  	if err != nil {
   460  		return err
   461  	}
   462  	if detachedNetNSPath == "" {
   463  		if err := mountSysfs(opt.NetworkDriver == nil, opt.EvacuateCgroup2); err != nil {
   464  			return err
   465  		}
   466  	}
   467  	if err := setupNet(stateDir, netMsg, etcWasCopied, opt.NetworkDriver, detachedNetNSPath); err != nil {
   468  		return err
   469  	}
   470  	portQuitCh := make(chan struct{})
   471  	portErrCh := make(chan error)
   472  	if opt.PortDriver != nil {
   473  		var portDriverOpaque map[string]string
   474  		if portMsg != nil {
   475  			portDriverOpaque = portMsg.PortDriverOpaque
   476  		}
   477  		go func() {
   478  			portErrCh <- opt.PortDriver.RunChildDriver(portDriverOpaque, portQuitCh, detachedNetNSPath)
   479  		}()
   480  	}
   481  
   482  	cmd, err := createCmd(opt)
   483  	if err != nil {
   484  		return err
   485  	}
   486  	if opt.Reaper {
   487  		if err := runAndReap(cmd); err != nil {
   488  			return fmt.Errorf("command %v exited: %w", opt.TargetCmd, err)
   489  		}
   490  	} else {
   491  		if err := cmd.Start(); err != nil {
   492  			return fmt.Errorf("command %v exited: %w", opt.TargetCmd, err)
   493  		}
   494  		sigc := sigproxy.ForwardAllSignals(context.TODO(), cmd.Process.Pid)
   495  		defer sigproxysignal.StopCatch(sigc)
   496  		if err := cmd.Wait(); err != nil {
   497  			return fmt.Errorf("command %v exited: %w", opt.TargetCmd, err)
   498  		}
   499  	}
   500  	if opt.PortDriver != nil {
   501  		portQuitCh <- struct{}{}
   502  		return <-portErrCh
   503  	}
   504  	return nil
   505  }
   506  
   507  func setMountPropagation(propagation string) error {
   508  	flags, ok := propagationStates[propagation]
   509  	if ok {
   510  		if err := unix.Mount("none", "/", "", flags, ""); err != nil {
   511  			return fmt.Errorf("failed to share mount point: /: %w", err)
   512  		}
   513  	}
   514  	return nil
   515  }
   516  
   517  func runAndReap(cmd *exec.Cmd) error {
   518  	c := make(chan os.Signal, 32)
   519  	signal.Notify(c, syscall.SIGCHLD)
   520  	cmd.SysProcAttr.Setsid = true
   521  	if err := cmd.Start(); err != nil {
   522  		return err
   523  	}
   524  	sigc := sigproxy.ForwardAllSignals(context.TODO(), cmd.Process.Pid)
   525  	defer sigproxysignal.StopCatch(sigc)
   526  
   527  	result := make(chan error)
   528  	go func() {
   529  		defer close(result)
   530  		for cEntry := range c {
   531  			logrus.Debugf("reaper: got signal %q", cEntry)
   532  			if wsPtr := reap(cmd.Process.Pid); wsPtr != nil {
   533  				ws := *wsPtr
   534  				if ws.Exited() && ws.ExitStatus() == 0 {
   535  					result <- nil
   536  					continue
   537  				}
   538  				var resultErr common.ErrorWithSys = &reaperErr{
   539  					ws: ws,
   540  				}
   541  				result <- resultErr
   542  			}
   543  		}
   544  	}()
   545  	return <-result
   546  }
   547  
   548  func reap(myPid int) *syscall.WaitStatus {
   549  	var res *syscall.WaitStatus
   550  	for {
   551  		var ws syscall.WaitStatus
   552  		pid, err := syscall.Wait4(-1, &ws, syscall.WNOHANG, nil)
   553  		logrus.Debugf("reaper: got ws=%+v, pid=%d, err=%+v", ws, pid, err)
   554  		if err != nil || pid <= 0 {
   555  			break
   556  		}
   557  		if pid == myPid {
   558  			res = &ws
   559  		}
   560  	}
   561  	return res
   562  }
   563  
   564  type reaperErr struct {
   565  	ws syscall.WaitStatus
   566  }
   567  
   568  func (e *reaperErr) Sys() interface{} {
   569  	return e.ws
   570  }
   571  
   572  func (e *reaperErr) Error() string {
   573  	if e.ws.Exited() {
   574  		return fmt.Sprintf("exit status %d", e.ws.ExitStatus())
   575  	}
   576  	if e.ws.Signaled() {
   577  		return fmt.Sprintf("signal: %s", e.ws.Signal())
   578  	}
   579  	return fmt.Sprintf("exited with WAITSTATUS=0x%08x", e.ws)
   580  }
   581  
   582  func NewNetNsWithPathWithoutEnter(p string) error {
   583  	if err := os.WriteFile(p, nil, 0400); err != nil {
   584  		return err
   585  	}
   586  	selfExe, err := os.Executable()
   587  	if err != nil {
   588  		return err
   589  	}
   590  	// this is hard (not impossible though) to reimplement in Go: https://github.com/cloudflare/slirpnetstack/commit/d7766a8a77f0093d3cb7a94bd0ccbe3f67d411ba
   591  	cmd := exec.Command("unshare", "-n", "mount", "--bind", "/proc/self/ns/net", p)
   592  	// Use our own implementation of unshare that is embedded in RootlessKit, so as to
   593  	// avoid /etc/apparmor.d/unshare-userns-restrict on Ubuntu 25.04.
   594  	// https://github.com/rootless-containers/rootlesskit/issues/494
   595  	cmd.Path = selfExe
   596  	out, err := cmd.CombinedOutput()
   597  	if err != nil {
   598  		return fmt.Errorf("failed to execute %v: %w (out=%q)", cmd.Args, err, string(out))
   599  	}
   600  	return nil
   601  }