github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/proc/tasks.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package proc 16 17 import ( 18 "bytes" 19 "sort" 20 "strconv" 21 22 "github.com/SagerNet/gvisor/pkg/abi/linux" 23 "github.com/SagerNet/gvisor/pkg/context" 24 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/kernfs" 25 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 26 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 27 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 28 "github.com/SagerNet/gvisor/pkg/syserror" 29 ) 30 31 const ( 32 selfName = "self" 33 threadSelfName = "thread-self" 34 ) 35 36 // tasksInode represents the inode for /proc/ directory. 37 // 38 // +stateify savable 39 type tasksInode struct { 40 implStatFS 41 kernfs.InodeAlwaysValid 42 kernfs.InodeAttrs 43 kernfs.InodeDirectoryNoNewChildren 44 kernfs.InodeNotSymlink 45 kernfs.InodeTemporary // This holds no meaning as this inode can't be Looked up and is always valid. 46 kernfs.OrderedChildren 47 tasksInodeRefs 48 49 locks vfs.FileLocks 50 51 fs *filesystem 52 pidns *kernel.PIDNamespace 53 54 // '/proc/self' and '/proc/thread-self' have custom directory offsets in 55 // Linux. So handle them outside of OrderedChildren. 56 57 // fakeCgroupControllers is a map of controller name to directory in the 58 // cgroup hierarchy. These controllers are immutable and will be listed 59 // in /proc/pid/cgroup if not nil. 60 fakeCgroupControllers map[string]string 61 } 62 63 var _ kernfs.Inode = (*tasksInode)(nil) 64 65 func (fs *filesystem) newTasksInode(ctx context.Context, k *kernel.Kernel, pidns *kernel.PIDNamespace, fakeCgroupControllers map[string]string) *tasksInode { 66 root := auth.NewRootCredentials(pidns.UserNamespace()) 67 contents := map[string]kernfs.Inode{ 68 "cmdline": fs.newInode(ctx, root, 0444, &cmdLineData{}), 69 "cpuinfo": fs.newInode(ctx, root, 0444, newStaticFileSetStat(cpuInfoData(k))), 70 "filesystems": fs.newInode(ctx, root, 0444, &filesystemsData{}), 71 "loadavg": fs.newInode(ctx, root, 0444, &loadavgData{}), 72 "sys": fs.newSysDir(ctx, root, k), 73 "meminfo": fs.newInode(ctx, root, 0444, &meminfoData{}), 74 "mounts": kernfs.NewStaticSymlink(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/mounts"), 75 "net": kernfs.NewStaticSymlink(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/net"), 76 "stat": fs.newInode(ctx, root, 0444, &statData{}), 77 "uptime": fs.newInode(ctx, root, 0444, &uptimeData{}), 78 "version": fs.newInode(ctx, root, 0444, &versionData{}), 79 } 80 // If fakeCgroupControllers are provided, don't create a cgroupfs backed 81 // /proc/cgroup as it will not match the fake controllers. 82 if len(fakeCgroupControllers) == 0 { 83 contents["cgroups"] = fs.newInode(ctx, root, 0444, &cgroupsData{}) 84 } 85 86 inode := &tasksInode{ 87 pidns: pidns, 88 fs: fs, 89 fakeCgroupControllers: fakeCgroupControllers, 90 } 91 inode.InodeAttrs.Init(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555) 92 inode.InitRefs() 93 94 inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) 95 links := inode.OrderedChildren.Populate(contents) 96 inode.IncLinks(links) 97 98 return inode 99 } 100 101 // Lookup implements kernfs.inodeDirectory.Lookup. 102 func (i *tasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) { 103 // Check if a static entry was looked up. 104 if d, err := i.OrderedChildren.Lookup(ctx, name); err == nil { 105 return d, nil 106 } 107 108 // Not a static entry. Try to lookup a corresponding task. 109 tid, err := strconv.ParseUint(name, 10, 64) 110 if err != nil { 111 root := auth.NewRootCredentials(i.pidns.UserNamespace()) 112 // If it failed to parse, check if it's one of the special handled files. 113 switch name { 114 case selfName: 115 return i.newSelfSymlink(ctx, root), nil 116 case threadSelfName: 117 return i.newThreadSelfSymlink(ctx, root), nil 118 } 119 return nil, syserror.ENOENT 120 } 121 122 task := i.pidns.TaskWithID(kernel.ThreadID(tid)) 123 if task == nil { 124 return nil, syserror.ENOENT 125 } 126 127 return i.fs.newTaskInode(ctx, task, i.pidns, true, i.fakeCgroupControllers) 128 } 129 130 // IterDirents implements kernfs.inodeDirectory.IterDirents. 131 func (i *tasksInode) IterDirents(ctx context.Context, mnt *vfs.Mount, cb vfs.IterDirentsCallback, offset, _ int64) (int64, error) { 132 // fs/proc/internal.h: #define FIRST_PROCESS_ENTRY 256 133 const FIRST_PROCESS_ENTRY = 256 134 135 // Use maxTaskID to shortcut searches that will result in 0 entries. 136 const maxTaskID = kernel.TasksLimit + 1 137 if offset >= maxTaskID { 138 return offset, nil 139 } 140 141 // According to Linux (fs/proc/base.c:proc_pid_readdir()), process directories 142 // start at offset FIRST_PROCESS_ENTRY with '/proc/self', followed by 143 // '/proc/thread-self' and then '/proc/[pid]'. 144 if offset < FIRST_PROCESS_ENTRY { 145 offset = FIRST_PROCESS_ENTRY 146 } 147 148 if offset == FIRST_PROCESS_ENTRY { 149 dirent := vfs.Dirent{ 150 Name: selfName, 151 Type: linux.DT_LNK, 152 Ino: i.fs.NextIno(), 153 NextOff: offset + 1, 154 } 155 if err := cb.Handle(dirent); err != nil { 156 return offset, err 157 } 158 offset++ 159 } 160 if offset == FIRST_PROCESS_ENTRY+1 { 161 dirent := vfs.Dirent{ 162 Name: threadSelfName, 163 Type: linux.DT_LNK, 164 Ino: i.fs.NextIno(), 165 NextOff: offset + 1, 166 } 167 if err := cb.Handle(dirent); err != nil { 168 return offset, err 169 } 170 offset++ 171 } 172 173 // Collect all tasks that TGIDs are greater than the offset specified. Per 174 // Linux we only include in directory listings if it's the leader. But for 175 // whatever crazy reason, you can still walk to the given node. 176 var tids []int 177 startTid := offset - FIRST_PROCESS_ENTRY - 2 178 for _, tg := range i.pidns.ThreadGroups() { 179 tid := i.pidns.IDOfThreadGroup(tg) 180 if int64(tid) < startTid { 181 continue 182 } 183 if leader := tg.Leader(); leader != nil { 184 tids = append(tids, int(tid)) 185 } 186 } 187 188 if len(tids) == 0 { 189 return offset, nil 190 } 191 192 sort.Ints(tids) 193 for _, tid := range tids { 194 dirent := vfs.Dirent{ 195 Name: strconv.FormatUint(uint64(tid), 10), 196 Type: linux.DT_DIR, 197 Ino: i.fs.NextIno(), 198 NextOff: FIRST_PROCESS_ENTRY + 2 + int64(tid) + 1, 199 } 200 if err := cb.Handle(dirent); err != nil { 201 return offset, err 202 } 203 offset++ 204 } 205 return maxTaskID, nil 206 } 207 208 // Open implements kernfs.Inode.Open. 209 func (i *tasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 210 fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), d, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{ 211 SeekEnd: kernfs.SeekEndZero, 212 }) 213 if err != nil { 214 return nil, err 215 } 216 return fd.VFSFileDescription(), nil 217 } 218 219 func (i *tasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { 220 stat, err := i.InodeAttrs.Stat(ctx, vsfs, opts) 221 if err != nil { 222 return linux.Statx{}, err 223 } 224 225 if opts.Mask&linux.STATX_NLINK != 0 { 226 // Add dynamic children to link count. 227 for _, tg := range i.pidns.ThreadGroups() { 228 if leader := tg.Leader(); leader != nil { 229 stat.Nlink++ 230 } 231 } 232 } 233 234 return stat, nil 235 } 236 237 // DecRef implements kernfs.Inode.DecRef. 238 func (i *tasksInode) DecRef(ctx context.Context) { 239 i.tasksInodeRefs.DecRef(func() { i.Destroy(ctx) }) 240 } 241 242 // staticFileSetStat implements a special static file that allows inode 243 // attributes to be set. This is to support /proc files that are readonly, but 244 // allow attributes to be set. 245 // 246 // +stateify savable 247 type staticFileSetStat struct { 248 dynamicBytesFileSetAttr 249 vfs.StaticData 250 } 251 252 var _ dynamicInode = (*staticFileSetStat)(nil) 253 254 func newStaticFileSetStat(data string) *staticFileSetStat { 255 return &staticFileSetStat{StaticData: vfs.StaticData{Data: data}} 256 } 257 258 func cpuInfoData(k *kernel.Kernel) string { 259 features := k.FeatureSet() 260 if features == nil { 261 // Kernel is always initialized with a FeatureSet. 262 panic("cpuinfo read with nil FeatureSet") 263 } 264 var buf bytes.Buffer 265 for i, max := uint(0), k.ApplicationCores(); i < max; i++ { 266 features.WriteCPUInfoTo(i, &buf) 267 } 268 return buf.String() 269 } 270 271 func shmData(v uint64) dynamicInode { 272 return newStaticFile(strconv.FormatUint(v, 10)) 273 }