github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/userns/usernsfd_linux.go (about)

     1  package userns
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"sort"
     7  	"strings"
     8  	"sync"
     9  	"syscall"
    10  
    11  	"github.com/sirupsen/logrus"
    12  	"golang.org/x/sys/unix"
    13  
    14  	"github.com/opencontainers/runc/libcontainer/configs"
    15  )
    16  
    17  type Mapping struct {
    18  	UIDMappings []configs.IDMap
    19  	GIDMappings []configs.IDMap
    20  }
    21  
    22  func (m Mapping) toSys() (uids, gids []syscall.SysProcIDMap) {
    23  	for _, uid := range m.UIDMappings {
    24  		uids = append(uids, syscall.SysProcIDMap{
    25  			ContainerID: int(uid.ContainerID),
    26  			HostID:      int(uid.HostID),
    27  			Size:        int(uid.Size),
    28  		})
    29  	}
    30  	for _, gid := range m.GIDMappings {
    31  		gids = append(gids, syscall.SysProcIDMap{
    32  			ContainerID: int(gid.ContainerID),
    33  			HostID:      int(gid.HostID),
    34  			Size:        int(gid.Size),
    35  		})
    36  	}
    37  	return
    38  }
    39  
    40  // id returns a unique identifier for this mapping, agnostic of the order of
    41  // the uid and gid mappings (because the order doesn't matter to the kernel).
    42  // The set of userns handles is indexed using this ID.
    43  func (m Mapping) id() string {
    44  	var uids, gids []string
    45  	for _, idmap := range m.UIDMappings {
    46  		uids = append(uids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size))
    47  	}
    48  	for _, idmap := range m.GIDMappings {
    49  		gids = append(gids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size))
    50  	}
    51  	// We don't care about the sort order -- just sort them.
    52  	sort.Strings(uids)
    53  	sort.Strings(gids)
    54  	return "uid=" + strings.Join(uids, ",") + ";gid=" + strings.Join(gids, ",")
    55  }
    56  
    57  type Handles struct {
    58  	m    sync.Mutex
    59  	maps map[string]*os.File
    60  }
    61  
    62  // Release all resources associated with this Handle. All existing files
    63  // returned from Get() will continue to work even after calling Release(). The
    64  // same Handles can be re-used after calling Release().
    65  func (hs *Handles) Release() {
    66  	hs.m.Lock()
    67  	defer hs.m.Unlock()
    68  
    69  	// Close the files for good measure, though GC will do that for us anyway.
    70  	for _, file := range hs.maps {
    71  		_ = file.Close()
    72  	}
    73  	hs.maps = nil
    74  }
    75  
    76  func spawnProc(req Mapping) (*os.Process, error) {
    77  	// We need to spawn a subprocess with the requested mappings, which is
    78  	// unfortunately quite expensive. The "safe" way of doing this is natively
    79  	// with Go (and then spawning something like "sleep infinity"), but
    80  	// execve() is a waste of cycles because we just need some process to have
    81  	// the right mapping, we don't care what it's executing. The "unsafe"
    82  	// option of doing a clone() behind the back of Go is probably okay in
    83  	// theory as long as we just do kill(getpid(), SIGSTOP). However, if we
    84  	// tell Go to put the new process into PTRACE_TRACEME mode, we can avoid
    85  	// the exec and not have to faff around with the mappings.
    86  	//
    87  	// Note that Go's stdlib does not support newuidmap, but in the case of
    88  	// id-mapped mounts, it seems incredibly unlikely that the user will be
    89  	// requesting us to do a remapping as an unprivileged user with mappings
    90  	// they have privileges over.
    91  	logrus.Debugf("spawning dummy process for id-mapping %s", req.id())
    92  	uidMappings, gidMappings := req.toSys()
    93  	// We don't need to use /proc/thread-self here because the exe mm of a
    94  	// thread-group is guaranteed to be the same for all threads by definition.
    95  	// This lets us avoid having to do runtime.LockOSThread.
    96  	return os.StartProcess("/proc/self/exe", []string{"runc", "--help"}, &os.ProcAttr{
    97  		Sys: &syscall.SysProcAttr{
    98  			Cloneflags:                 unix.CLONE_NEWUSER,
    99  			UidMappings:                uidMappings,
   100  			GidMappings:                gidMappings,
   101  			GidMappingsEnableSetgroups: false,
   102  			// Put the process into PTRACE_TRACEME mode to allow us to get the
   103  			// userns without having a proper execve() target.
   104  			Ptrace: true,
   105  		},
   106  	})
   107  }
   108  
   109  func dupFile(f *os.File) (*os.File, error) {
   110  	newFd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0)
   111  	if err != nil {
   112  		return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err)
   113  	}
   114  	return os.NewFile(uintptr(newFd), f.Name()), nil
   115  }
   116  
   117  // Get returns a handle to a /proc/$pid/ns/user nsfs file with the requested
   118  // mapping. The processes spawned to produce userns nsfds are cached, so if
   119  // equivalent user namespace mappings are requested, the same user namespace
   120  // will be returned. The caller is responsible for closing the returned file
   121  // descriptor.
   122  func (hs *Handles) Get(req Mapping) (file *os.File, err error) {
   123  	hs.m.Lock()
   124  	defer hs.m.Unlock()
   125  
   126  	if hs.maps == nil {
   127  		hs.maps = make(map[string]*os.File)
   128  	}
   129  
   130  	file, ok := hs.maps[req.id()]
   131  	if !ok {
   132  		proc, err := spawnProc(req)
   133  		if err != nil {
   134  			return nil, fmt.Errorf("failed to spawn dummy process for map %s: %w", req.id(), err)
   135  		}
   136  		// Make sure we kill the helper process. We ignore errors because
   137  		// there's not much we can do about them anyway, and ultimately
   138  		defer func() {
   139  			_ = proc.Kill()
   140  			_, _ = proc.Wait()
   141  		}()
   142  
   143  		// Stash away a handle to the userns file. This is neater than keeping
   144  		// the process alive, because Go's GC can handle files much better than
   145  		// leaked processes, and having long-living useless processes seems
   146  		// less than ideal.
   147  		file, err = os.Open(fmt.Sprintf("/proc/%d/ns/user", proc.Pid))
   148  		if err != nil {
   149  			return nil, err
   150  		}
   151  		hs.maps[req.id()] = file
   152  	}
   153  	// Duplicate the file, to make sure the lifecycle of each *os.File we
   154  	// return is independent.
   155  	return dupFile(file)
   156  }