github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/proc/tasks.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package proc 16 17 import ( 18 "bytes" 19 "sort" 20 "strconv" 21 22 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 23 "github.com/nicocha30/gvisor-ligolo/pkg/context" 24 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 25 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/kernfs" 26 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel" 27 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth" 28 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs" 29 ) 30 31 const ( 32 selfName = "self" 33 threadSelfName = "thread-self" 34 ) 35 36 // tasksInode represents the inode for /proc/ directory. 37 // 38 // +stateify savable 39 type tasksInode struct { 40 implStatFS 41 kernfs.InodeAlwaysValid 42 kernfs.InodeAttrs 43 kernfs.InodeDirectoryNoNewChildren 44 kernfs.InodeNotAnonymous 45 kernfs.InodeNotSymlink 46 kernfs.InodeTemporary // This holds no meaning as this inode can't be Looked up and is always valid. 47 kernfs.InodeWatches 48 kernfs.OrderedChildren 49 tasksInodeRefs 50 51 locks vfs.FileLocks 52 53 fs *filesystem 54 pidns *kernel.PIDNamespace 55 56 // '/proc/self' and '/proc/thread-self' have custom directory offsets in 57 // Linux. So handle them outside of OrderedChildren. 58 59 // fakeCgroupControllers is a map of controller name to directory in the 60 // cgroup hierarchy. These controllers are immutable and will be listed 61 // in /proc/pid/cgroup if not nil. 62 fakeCgroupControllers map[string]string 63 } 64 65 var _ kernfs.Inode = (*tasksInode)(nil) 66 67 func (fs *filesystem) newTasksInode(ctx context.Context, k *kernel.Kernel, pidns *kernel.PIDNamespace, fakeCgroupControllers map[string]string) *tasksInode { 68 root := auth.NewRootCredentials(pidns.UserNamespace()) 69 contents := map[string]kernfs.Inode{ 70 "cmdline": fs.newInode(ctx, root, 0444, &cmdLineData{}), 71 "cpuinfo": fs.newInode(ctx, root, 0444, newStaticFileSetStat(cpuInfoData(k))), 72 "filesystems": fs.newInode(ctx, root, 0444, &filesystemsData{}), 73 "loadavg": fs.newInode(ctx, root, 0444, &loadavgData{}), 74 "sys": fs.newSysDir(ctx, root, k), 75 "meminfo": fs.newInode(ctx, root, 0444, &meminfoData{}), 76 "mounts": kernfs.NewStaticSymlink(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/mounts"), 77 "net": kernfs.NewStaticSymlink(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/net"), 78 "sentry-meminfo": fs.newInode(ctx, root, 0444, &sentryMeminfoData{}), 79 "stat": fs.newInode(ctx, root, 0444, &statData{}), 80 "uptime": fs.newInode(ctx, root, 0444, &uptimeData{}), 81 "version": fs.newInode(ctx, root, 0444, &versionData{}), 82 } 83 // If fakeCgroupControllers are provided, don't create a cgroupfs backed 84 // /proc/cgroup as it will not match the fake controllers. 85 if len(fakeCgroupControllers) == 0 { 86 contents["cgroups"] = fs.newInode(ctx, root, 0444, &cgroupsData{}) 87 } 88 89 inode := &tasksInode{ 90 pidns: pidns, 91 fs: fs, 92 fakeCgroupControllers: fakeCgroupControllers, 93 } 94 inode.InodeAttrs.Init(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555) 95 inode.InitRefs() 96 97 inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) 98 links := inode.OrderedChildren.Populate(contents) 99 inode.IncLinks(links) 100 101 return inode 102 } 103 104 // Lookup implements kernfs.inodeDirectory.Lookup. 105 func (i *tasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) { 106 // Check if a static entry was looked up. 107 if d, err := i.OrderedChildren.Lookup(ctx, name); err == nil { 108 return d, nil 109 } 110 111 // Not a static entry. Try to lookup a corresponding task. 112 tid, err := strconv.ParseUint(name, 10, 64) 113 if err != nil { 114 root := auth.NewRootCredentials(i.pidns.UserNamespace()) 115 // If it failed to parse, check if it's one of the special handled files. 116 switch name { 117 case selfName: 118 return i.newSelfSymlink(ctx, root), nil 119 case threadSelfName: 120 return i.newThreadSelfSymlink(ctx, root), nil 121 } 122 return nil, linuxerr.ENOENT 123 } 124 125 task := i.pidns.TaskWithID(kernel.ThreadID(tid)) 126 if task == nil { 127 return nil, linuxerr.ENOENT 128 } 129 130 return i.fs.newTaskInode(ctx, task, i.pidns, true, i.fakeCgroupControllers) 131 } 132 133 // IterDirents implements kernfs.inodeDirectory.IterDirents. 134 func (i *tasksInode) IterDirents(ctx context.Context, mnt *vfs.Mount, cb vfs.IterDirentsCallback, offset, _ int64) (int64, error) { 135 // fs/proc/internal.h: #define FIRST_PROCESS_ENTRY 256 136 const FIRST_PROCESS_ENTRY = 256 137 138 // Use maxTaskID to shortcut searches that will result in 0 entries. 139 const maxTaskID = kernel.TasksLimit + 1 140 if offset >= maxTaskID { 141 return offset, nil 142 } 143 144 // According to Linux (fs/proc/base.c:proc_pid_readdir()), process directories 145 // start at offset FIRST_PROCESS_ENTRY with '/proc/self', followed by 146 // '/proc/thread-self' and then '/proc/[pid]'. 147 if offset < FIRST_PROCESS_ENTRY { 148 offset = FIRST_PROCESS_ENTRY 149 } 150 151 if offset == FIRST_PROCESS_ENTRY { 152 dirent := vfs.Dirent{ 153 Name: selfName, 154 Type: linux.DT_LNK, 155 Ino: i.fs.NextIno(), 156 NextOff: offset + 1, 157 } 158 if err := cb.Handle(dirent); err != nil { 159 return offset, err 160 } 161 offset++ 162 } 163 if offset == FIRST_PROCESS_ENTRY+1 { 164 dirent := vfs.Dirent{ 165 Name: threadSelfName, 166 Type: linux.DT_LNK, 167 Ino: i.fs.NextIno(), 168 NextOff: offset + 1, 169 } 170 if err := cb.Handle(dirent); err != nil { 171 return offset, err 172 } 173 offset++ 174 } 175 176 // Collect all tasks that TGIDs are greater than the offset specified. Per 177 // Linux we only include in directory listings if it's the leader. But for 178 // whatever crazy reason, you can still walk to the given node. 179 var tids []int 180 startTid := offset - FIRST_PROCESS_ENTRY - 2 181 for _, tg := range i.pidns.ThreadGroups() { 182 tid := i.pidns.IDOfThreadGroup(tg) 183 if int64(tid) < startTid { 184 continue 185 } 186 if leader := tg.Leader(); leader != nil { 187 tids = append(tids, int(tid)) 188 } 189 } 190 191 if len(tids) == 0 { 192 return offset, nil 193 } 194 195 sort.Ints(tids) 196 for _, tid := range tids { 197 dirent := vfs.Dirent{ 198 Name: strconv.FormatUint(uint64(tid), 10), 199 Type: linux.DT_DIR, 200 Ino: i.fs.NextIno(), 201 NextOff: FIRST_PROCESS_ENTRY + 2 + int64(tid) + 1, 202 } 203 if err := cb.Handle(dirent); err != nil { 204 return offset, err 205 } 206 offset++ 207 } 208 return maxTaskID, nil 209 } 210 211 // Open implements kernfs.Inode.Open. 212 func (i *tasksInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { 213 fd, err := kernfs.NewGenericDirectoryFD(rp.Mount(), d, &i.OrderedChildren, &i.locks, &opts, kernfs.GenericDirectoryFDOptions{ 214 SeekEnd: kernfs.SeekEndZero, 215 }) 216 if err != nil { 217 return nil, err 218 } 219 return fd.VFSFileDescription(), nil 220 } 221 222 func (i *tasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { 223 stat, err := i.InodeAttrs.Stat(ctx, vsfs, opts) 224 if err != nil { 225 return linux.Statx{}, err 226 } 227 228 if opts.Mask&linux.STATX_NLINK != 0 { 229 // Add dynamic children to link count. 230 for _, tg := range i.pidns.ThreadGroups() { 231 if leader := tg.Leader(); leader != nil { 232 stat.Nlink++ 233 } 234 } 235 } 236 237 return stat, nil 238 } 239 240 // DecRef implements kernfs.Inode.DecRef. 241 func (i *tasksInode) DecRef(ctx context.Context) { 242 i.tasksInodeRefs.DecRef(func() { i.Destroy(ctx) }) 243 } 244 245 // staticFileSetStat implements a special static file that allows inode 246 // attributes to be set. This is to support /proc files that are readonly, but 247 // allow attributes to be set. 248 // 249 // +stateify savable 250 type staticFileSetStat struct { 251 dynamicBytesFileSetAttr 252 vfs.StaticData 253 } 254 255 var _ dynamicInode = (*staticFileSetStat)(nil) 256 257 func newStaticFileSetStat(data string) *staticFileSetStat { 258 return &staticFileSetStat{StaticData: vfs.StaticData{Data: data}} 259 } 260 261 func cpuInfoData(k *kernel.Kernel) string { 262 features := k.FeatureSet() 263 var buf bytes.Buffer 264 for i, max := uint(0), k.ApplicationCores(); i < max; i++ { 265 features.WriteCPUInfoTo(i, &buf) 266 } 267 return buf.String() 268 } 269 270 func ipcData(v uint64) dynamicInode { 271 return newStaticFile(strconv.FormatUint(v, 10)) 272 }