gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/fsimpl/proc/task.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package proc 16 17 import ( 18 "bytes" 19 "fmt" 20 21 "gvisor.dev/gvisor/pkg/abi/linux" 22 "gvisor.dev/gvisor/pkg/context" 23 "gvisor.dev/gvisor/pkg/errors/linuxerr" 24 "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" 25 "gvisor.dev/gvisor/pkg/sentry/kernel" 26 "gvisor.dev/gvisor/pkg/sentry/kernel/auth" 27 "gvisor.dev/gvisor/pkg/sentry/mm" 28 "gvisor.dev/gvisor/pkg/sentry/vfs" 29 ) 30 31 // taskInode represents the inode for /proc/PID/ directory. 32 // 33 // +stateify savable 34 type taskInode struct { 35 implStatFS 36 kernfs.InodeAttrs 37 kernfs.InodeDirectoryNoNewChildren 38 kernfs.InodeNotAnonymous 39 kernfs.InodeNotSymlink 40 kernfs.InodeTemporary 41 kernfs.InodeWatches 42 kernfs.OrderedChildren 43 taskInodeRefs 44 45 locks vfs.FileLocks 46 47 task *kernel.Task 48 } 49 50 var _ kernfs.Inode = (*taskInode)(nil) 51 52 func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, fakeCgroupControllers map[string]string) (kernfs.Inode, error) { 53 if task.ExitState() == kernel.TaskExitDead { 54 return nil, linuxerr.ESRCH 55 } 56 57 contents := map[string]kernfs.Inode{ 58 "auxv": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &auxvData{task: task}), 59 "cmdline": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &metadataData{task: task, metaType: Cmdline}), 60 "comm": fs.newComm(ctx, task, fs.NextIno(), 0644), 61 "cwd": fs.newCwdSymlink(ctx, task, fs.NextIno()), 62 "environ": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &metadataData{task: task, metaType: Environ}), 63 "exe": fs.newExeSymlink(ctx, task, fs.NextIno()), 64 "fd": fs.newFDDirInode(ctx, task), 65 "fdinfo": fs.newFDInfoDirInode(ctx, task), 66 "gid_map": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &idMapData{task: task, gids: true}), 67 "io": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0400, newIO(task, isThreadGroup)), 68 "limits": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &limitsData{task: task}), 69 "maps": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mapsData{task: task}), 70 "mem": fs.newMemInode(ctx, task, fs.NextIno(), 0400), 71 "mountinfo": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mountInfoData{fs: fs, task: task}), 72 "mounts": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mountsData{fs: fs, task: task}), 73 "net": fs.newTaskNetDir(ctx, task), 74 "ns": fs.newTaskOwnedDir(ctx, task, fs.NextIno(), 0511, map[string]kernfs.Inode{ 75 "net": fs.newNamespaceSymlink(ctx, task, fs.NextIno(), linux.CLONE_NEWNET), 76 "mnt": fs.newNamespaceSymlink(ctx, task, fs.NextIno(), linux.CLONE_NEWNS), 77 "pid": fs.newPIDNamespaceSymlink(ctx, task, fs.NextIno()), 78 "user": fs.newFakeNamespaceSymlink(ctx, task, fs.NextIno(), "user"), 79 "ipc": fs.newNamespaceSymlink(ctx, task, fs.NextIno(), linux.CLONE_NEWIPC), 80 "uts": fs.newNamespaceSymlink(ctx, task, fs.NextIno(), linux.CLONE_NEWUTS), 81 }), 82 "oom_score": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, newStaticFile("0\n")), 83 "oom_score_adj": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &oomScoreAdj{task: task}), 84 "root": fs.newRootSymlink(ctx, task, fs.NextIno()), 85 "smaps": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &smapsData{task: task}), 86 "stat": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}), 87 "statm": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &statmData{task: task}), 88 "status": fs.newStatusInode(ctx, task, pidns, fs.NextIno(), 0444), 89 "uid_map": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &idMapData{task: task, gids: false}), 90 } 91 if isThreadGroup { 92 contents["task"] = fs.newSubtasks(ctx, task, pidns, fakeCgroupControllers) 93 } else { 94 contents["children"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &childrenData{task: task, pidns: pidns}) 95 } 96 if len(fakeCgroupControllers) > 0 { 97 contents["cgroup"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, newFakeCgroupData(fakeCgroupControllers)) 98 } else { 99 contents["cgroup"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &taskCgroupData{task: task}) 100 } 101 102 taskInode := &taskInode{task: task} 103 // Note: credentials are overridden by taskOwnedInode. 104 taskInode.InodeAttrs.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555) 105 taskInode.InitRefs() 106 107 inode := &taskOwnedInode{Inode: taskInode, owner: task} 108 109 taskInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) 110 links := taskInode.OrderedChildren.Populate(contents) 111 taskInode.IncLinks(links) 112 113 return inode, nil 114 } 115 116 // Valid implements kernfs.Inode.Valid. This inode remains valid as long 117 // as the task is still running. When it's dead, another tasks with the same 118 // PID could replace it. 119 func (i *taskInode) Valid(ctx context.Context, parent *kernfs.Dentry, name string) bool { 120 return i.task.ExitState() != kernel.TaskExitDead 121 } 122 123 // Open implements kernfs.Inode.Open. 124 func (i *taskInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 125 fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), d, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{ 126 SeekEnd: kernfs.SeekEndZero, 127 }) 128 if err != nil { 129 return nil, err 130 } 131 return fd.VFSFileDescription(), nil 132 } 133 134 // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. 135 func (*taskInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { 136 return linuxerr.EPERM 137 } 138 139 // DecRef implements kernfs.Inode.DecRef. 140 func (i *taskInode) DecRef(ctx context.Context) { 141 i.taskInodeRefs.DecRef(func() { i.Destroy(ctx) }) 142 } 143 144 // taskOwnedInode implements kernfs.Inode and overrides inode owner with task 145 // effective user and group. 146 // 147 // +stateify savable 148 type taskOwnedInode struct { 149 kernfs.Inode 150 151 // owner is the task that owns this inode. 152 owner *kernel.Task 153 } 154 155 var _ kernfs.Inode = (*taskOwnedInode)(nil) 156 157 func (fs *filesystem) newTaskOwnedInode(ctx context.Context, task *kernel.Task, ino uint64, perm linux.FileMode, inode dynamicInode) kernfs.Inode { 158 // Note: credentials are overridden by taskOwnedInode. 159 inode.Init(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, inode, perm) 160 161 return &taskOwnedInode{Inode: inode, owner: task} 162 } 163 164 func (fs *filesystem) newTaskOwnedDir(ctx context.Context, task *kernel.Task, ino uint64, perm linux.FileMode, children map[string]kernfs.Inode) kernfs.Inode { 165 // Note: credentials are overridden by taskOwnedInode. 166 fdOpts := kernfs.GenericDirectoryFDOptions{SeekEnd: kernfs.SeekEndZero} 167 dir := kernfs.NewStaticDir(ctx, task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm, children, fdOpts) 168 169 return &taskOwnedInode{Inode: dir, owner: task} 170 } 171 172 func (i *taskOwnedInode) Valid(ctx context.Context, parent *kernfs.Dentry, name string) bool { 173 return i.owner.ExitState() != kernel.TaskExitDead && i.Inode.Valid(ctx, parent, name) 174 } 175 176 // Stat implements kernfs.Inode.Stat. 177 func (i *taskOwnedInode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { 178 stat, err := i.Inode.Stat(ctx, fs, opts) 179 if err != nil { 180 return linux.Statx{}, err 181 } 182 if opts.Mask&(linux.STATX_UID|linux.STATX_GID) != 0 { 183 uid, gid := i.getOwner(linux.FileMode(stat.Mode)) 184 if opts.Mask&linux.STATX_UID != 0 { 185 stat.UID = uint32(uid) 186 } 187 if opts.Mask&linux.STATX_GID != 0 { 188 stat.GID = uint32(gid) 189 } 190 } 191 return stat, nil 192 } 193 194 // CheckPermissions implements kernfs.Inode.CheckPermissions. 195 func (i *taskOwnedInode) CheckPermissions(_ context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { 196 mode := i.Mode() 197 uid, gid := i.getOwner(mode) 198 return vfs.GenericCheckPermissions(creds, ats, mode, uid, gid) 199 } 200 201 func (i *taskOwnedInode) getOwner(mode linux.FileMode) (auth.KUID, auth.KGID) { 202 // By default, set the task owner as the file owner. 203 creds := i.owner.Credentials() 204 uid := creds.EffectiveKUID 205 gid := creds.EffectiveKGID 206 207 // Linux doesn't apply dumpability adjustments to world readable/executable 208 // directories so that applications can stat /proc/PID to determine the 209 // effective UID of a process. See fs/proc/base.c:task_dump_owner. 210 if mode.FileType() == linux.ModeDirectory && mode.Permissions() == 0555 { 211 return uid, gid 212 } 213 214 // If the task is not dumpable, then root (in the namespace preferred) 215 // owns the file. 216 m := getMM(i.owner) 217 if m == nil { 218 return auth.RootKUID, auth.RootKGID 219 } 220 if m.Dumpability() != mm.UserDumpable { 221 uid = auth.RootKUID 222 if kuid := creds.UserNamespace.MapToKUID(auth.RootUID); kuid.Ok() { 223 uid = kuid 224 } 225 gid = auth.RootKGID 226 if kgid := creds.UserNamespace.MapToKGID(auth.RootGID); kgid.Ok() { 227 gid = kgid 228 } 229 } 230 return uid, gid 231 } 232 233 func newIO(t *kernel.Task, isThreadGroup bool) *ioData { 234 if isThreadGroup { 235 return &ioData{ioUsage: t.ThreadGroup()} 236 } 237 return &ioData{ioUsage: t} 238 } 239 240 // newFakeCgroupData creates an inode that shows fake cgroup 241 // information passed in as mount options. From man 7 cgroups: "For 242 // each cgroup hierarchy of which the process is a member, there is 243 // one entry containing three colon-separated fields: 244 // hierarchy-ID:controller-list:cgroup-path" 245 // 246 // TODO(b/182488796): Remove once all users adopt cgroupfs. 247 func newFakeCgroupData(controllers map[string]string) dynamicInode { 248 var buf bytes.Buffer 249 250 // The hierarchy ids must be positive integers (for cgroup v1), but the 251 // exact number does not matter, so long as they are unique. We can 252 // just use a counter, but since linux sorts this file in descending 253 // order, we must count down to preserve this behavior. 254 i := len(controllers) 255 for name, dir := range controllers { 256 fmt.Fprintf(&buf, "%d:%s:%s\n", i, name, dir) 257 i-- 258 } 259 return newStaticFile(buf.String()) 260 }