github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/userns/usernsfd_linux.go (about) 1 package userns 2 3 import ( 4 "fmt" 5 "os" 6 "sort" 7 "strings" 8 "sync" 9 "syscall" 10 11 "github.com/sirupsen/logrus" 12 "golang.org/x/sys/unix" 13 14 "github.com/opencontainers/runc/libcontainer/configs" 15 ) 16 17 type Mapping struct { 18 UIDMappings []configs.IDMap 19 GIDMappings []configs.IDMap 20 } 21 22 func (m Mapping) toSys() (uids, gids []syscall.SysProcIDMap) { 23 for _, uid := range m.UIDMappings { 24 uids = append(uids, syscall.SysProcIDMap{ 25 ContainerID: int(uid.ContainerID), 26 HostID: int(uid.HostID), 27 Size: int(uid.Size), 28 }) 29 } 30 for _, gid := range m.GIDMappings { 31 gids = append(gids, syscall.SysProcIDMap{ 32 ContainerID: int(gid.ContainerID), 33 HostID: int(gid.HostID), 34 Size: int(gid.Size), 35 }) 36 } 37 return 38 } 39 40 // id returns a unique identifier for this mapping, agnostic of the order of 41 // the uid and gid mappings (because the order doesn't matter to the kernel). 42 // The set of userns handles is indexed using this ID. 43 func (m Mapping) id() string { 44 var uids, gids []string 45 for _, idmap := range m.UIDMappings { 46 uids = append(uids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size)) 47 } 48 for _, idmap := range m.GIDMappings { 49 gids = append(gids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size)) 50 } 51 // We don't care about the sort order -- just sort them. 52 sort.Strings(uids) 53 sort.Strings(gids) 54 return "uid=" + strings.Join(uids, ",") + ";gid=" + strings.Join(gids, ",") 55 } 56 57 type Handles struct { 58 m sync.Mutex 59 maps map[string]*os.File 60 } 61 62 // Release all resources associated with this Handle. All existing files 63 // returned from Get() will continue to work even after calling Release(). The 64 // same Handles can be re-used after calling Release(). 65 func (hs *Handles) Release() { 66 hs.m.Lock() 67 defer hs.m.Unlock() 68 69 // Close the files for good measure, though GC will do that for us anyway. 70 for _, file := range hs.maps { 71 _ = file.Close() 72 } 73 hs.maps = nil 74 } 75 76 func spawnProc(req Mapping) (*os.Process, error) { 77 // We need to spawn a subprocess with the requested mappings, which is 78 // unfortunately quite expensive. The "safe" way of doing this is natively 79 // with Go (and then spawning something like "sleep infinity"), but 80 // execve() is a waste of cycles because we just need some process to have 81 // the right mapping, we don't care what it's executing. The "unsafe" 82 // option of doing a clone() behind the back of Go is probably okay in 83 // theory as long as we just do kill(getpid(), SIGSTOP). However, if we 84 // tell Go to put the new process into PTRACE_TRACEME mode, we can avoid 85 // the exec and not have to faff around with the mappings. 86 // 87 // Note that Go's stdlib does not support newuidmap, but in the case of 88 // id-mapped mounts, it seems incredibly unlikely that the user will be 89 // requesting us to do a remapping as an unprivileged user with mappings 90 // they have privileges over. 91 logrus.Debugf("spawning dummy process for id-mapping %s", req.id()) 92 uidMappings, gidMappings := req.toSys() 93 // We don't need to use /proc/thread-self here because the exe mm of a 94 // thread-group is guaranteed to be the same for all threads by definition. 95 // This lets us avoid having to do runtime.LockOSThread. 96 return os.StartProcess("/proc/self/exe", []string{"runc", "--help"}, &os.ProcAttr{ 97 Sys: &syscall.SysProcAttr{ 98 Cloneflags: unix.CLONE_NEWUSER, 99 UidMappings: uidMappings, 100 GidMappings: gidMappings, 101 GidMappingsEnableSetgroups: false, 102 // Put the process into PTRACE_TRACEME mode to allow us to get the 103 // userns without having a proper execve() target. 104 Ptrace: true, 105 }, 106 }) 107 } 108 109 func dupFile(f *os.File) (*os.File, error) { 110 newFd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0) 111 if err != nil { 112 return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err) 113 } 114 return os.NewFile(uintptr(newFd), f.Name()), nil 115 } 116 117 // Get returns a handle to a /proc/$pid/ns/user nsfs file with the requested 118 // mapping. The processes spawned to produce userns nsfds are cached, so if 119 // equivalent user namespace mappings are requested, the same user namespace 120 // will be returned. The caller is responsible for closing the returned file 121 // descriptor. 122 func (hs *Handles) Get(req Mapping) (file *os.File, err error) { 123 hs.m.Lock() 124 defer hs.m.Unlock() 125 126 if hs.maps == nil { 127 hs.maps = make(map[string]*os.File) 128 } 129 130 file, ok := hs.maps[req.id()] 131 if !ok { 132 proc, err := spawnProc(req) 133 if err != nil { 134 return nil, fmt.Errorf("failed to spawn dummy process for map %s: %w", req.id(), err) 135 } 136 // Make sure we kill the helper process. We ignore errors because 137 // there's not much we can do about them anyway, and ultimately 138 defer func() { 139 _ = proc.Kill() 140 _, _ = proc.Wait() 141 }() 142 143 // Stash away a handle to the userns file. This is neater than keeping 144 // the process alive, because Go's GC can handle files much better than 145 // leaked processes, and having long-living useless processes seems 146 // less than ideal. 147 file, err = os.Open(fmt.Sprintf("/proc/%d/ns/user", proc.Pid)) 148 if err != nil { 149 return nil, err 150 } 151 hs.maps[req.id()] = file 152 } 153 // Duplicate the file, to make sure the lifecycle of each *os.File we 154 // return is independent. 155 return dupFile(file) 156 }