github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/proc/task.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package proc 16 17 import ( 18 "bytes" 19 "fmt" 20 "io" 21 "sort" 22 "strconv" 23 24 "github.com/SagerNet/gvisor/pkg/abi/linux" 25 "github.com/SagerNet/gvisor/pkg/context" 26 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 27 "github.com/SagerNet/gvisor/pkg/hostarch" 28 "github.com/SagerNet/gvisor/pkg/sentry/fs" 29 "github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil" 30 "github.com/SagerNet/gvisor/pkg/sentry/fs/proc/device" 31 "github.com/SagerNet/gvisor/pkg/sentry/fs/proc/seqfile" 32 "github.com/SagerNet/gvisor/pkg/sentry/fs/ramfs" 33 "github.com/SagerNet/gvisor/pkg/sentry/fsbridge" 34 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 35 "github.com/SagerNet/gvisor/pkg/sentry/limits" 36 "github.com/SagerNet/gvisor/pkg/sentry/mm" 37 "github.com/SagerNet/gvisor/pkg/sentry/usage" 38 "github.com/SagerNet/gvisor/pkg/syserror" 39 "github.com/SagerNet/gvisor/pkg/usermem" 40 "github.com/SagerNet/gvisor/pkg/waiter" 41 ) 42 43 // LINT.IfChange 44 45 // getTaskMM returns t's MemoryManager. If getTaskMM succeeds, the MemoryManager's 46 // users count is incremented, and must be decremented by the caller when it is 47 // no longer in use. 48 func getTaskMM(t *kernel.Task) (*mm.MemoryManager, error) { 49 if t.ExitState() == kernel.TaskExitDead { 50 return nil, syserror.ESRCH 51 } 52 var m *mm.MemoryManager 53 t.WithMuLocked(func(t *kernel.Task) { 54 m = t.MemoryManager() 55 }) 56 if m == nil || !m.IncUsers() { 57 return nil, io.EOF 58 } 59 return m, nil 60 } 61 62 func checkTaskState(t *kernel.Task) error { 63 switch t.ExitState() { 64 case kernel.TaskExitZombie: 65 return linuxerr.EACCES 66 case kernel.TaskExitDead: 67 return syserror.ESRCH 68 } 69 return nil 70 } 71 72 // taskDir represents a task-level directory. 73 // 74 // +stateify savable 75 type taskDir struct { 76 ramfs.Dir 77 78 t *kernel.Task 79 } 80 81 var _ fs.InodeOperations = (*taskDir)(nil) 82 83 // newTaskDir creates a new proc task entry. 84 func (p *proc) newTaskDir(ctx context.Context, t *kernel.Task, msrc *fs.MountSource, isThreadGroup bool) *fs.Inode { 85 contents := map[string]*fs.Inode{ 86 "auxv": newAuxvec(ctx, t, msrc), 87 "cmdline": newExecArgInode(ctx, t, msrc, cmdlineExecArg), 88 "comm": newComm(ctx, t, msrc), 89 "cwd": newCwd(ctx, t, msrc), 90 "environ": newExecArgInode(ctx, t, msrc, environExecArg), 91 "exe": newExe(ctx, t, msrc), 92 "fd": newFdDir(ctx, t, msrc), 93 "fdinfo": newFdInfoDir(ctx, t, msrc), 94 "gid_map": newGIDMap(ctx, t, msrc), 95 "io": newIO(ctx, t, msrc, isThreadGroup), 96 "maps": newMaps(ctx, t, msrc), 97 "mem": newMem(ctx, t, msrc), 98 "mountinfo": seqfile.NewSeqFileInode(ctx, &mountInfoFile{t: t}, msrc), 99 "mounts": seqfile.NewSeqFileInode(ctx, &mountsFile{t: t}, msrc), 100 "net": newNetDir(ctx, t, msrc), 101 "ns": newNamespaceDir(ctx, t, msrc), 102 "oom_score": newOOMScore(ctx, msrc), 103 "oom_score_adj": newOOMScoreAdj(ctx, t, msrc), 104 "smaps": newSmaps(ctx, t, msrc), 105 "stat": newTaskStat(ctx, t, msrc, isThreadGroup, p.pidns), 106 "statm": newStatm(ctx, t, msrc), 107 "status": newStatus(ctx, t, msrc, p.pidns), 108 "uid_map": newUIDMap(ctx, t, msrc), 109 } 110 if isThreadGroup { 111 contents["task"] = p.newSubtasks(ctx, t, msrc) 112 } 113 if len(p.cgroupControllers) > 0 { 114 contents["cgroup"] = newCGroupInode(ctx, msrc, p.cgroupControllers) 115 } 116 117 // N.B. taskOwnedInodeOps enforces dumpability-based ownership. 118 d := &taskDir{ 119 Dir: *ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0555)), 120 t: t, 121 } 122 return newProcInode(ctx, d, msrc, fs.SpecialDirectory, t) 123 } 124 125 // subtasks represents a /proc/TID/task directory. 126 // 127 // +stateify savable 128 type subtasks struct { 129 ramfs.Dir 130 131 t *kernel.Task 132 p *proc 133 } 134 135 var _ fs.InodeOperations = (*subtasks)(nil) 136 137 func (p *proc) newSubtasks(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode { 138 s := &subtasks{ 139 Dir: *ramfs.NewDir(ctx, nil, fs.RootOwner, fs.FilePermsFromMode(0555)), 140 t: t, 141 p: p, 142 } 143 return newProcInode(ctx, s, msrc, fs.SpecialDirectory, t) 144 } 145 146 // UnstableAttr returns unstable attributes of the subtasks. 147 func (s *subtasks) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) { 148 uattr, err := s.Dir.UnstableAttr(ctx, inode) 149 if err != nil { 150 return fs.UnstableAttr{}, err 151 } 152 // We can't rely on ramfs' implementation because the task directories are 153 // generated dynamically. 154 uattr.Links = uint64(2 + s.t.ThreadGroup().Count()) 155 return uattr, nil 156 } 157 158 // GetFile implements fs.InodeOperations.GetFile. 159 func (s *subtasks) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { 160 return fs.NewFile(ctx, dirent, flags, &subtasksFile{t: s.t, pidns: s.p.pidns}), nil 161 } 162 163 // +stateify savable 164 type subtasksFile struct { 165 fsutil.DirFileOperations `state:"nosave"` 166 fsutil.FileUseInodeUnstableAttr `state:"nosave"` 167 168 t *kernel.Task 169 pidns *kernel.PIDNamespace 170 } 171 172 // Readdir implements fs.FileOperations.Readdir. 173 func (f *subtasksFile) Readdir(ctx context.Context, file *fs.File, ser fs.DentrySerializer) (int64, error) { 174 dirCtx := fs.DirCtx{ 175 Serializer: ser, 176 } 177 178 // Note that unlike most Readdir implementations, the offset here is 179 // not an index into the subtasks, but rather the TID of the next 180 // subtask to emit. 181 offset := file.Offset() 182 183 tasks := f.t.ThreadGroup().MemberIDs(f.pidns) 184 if len(tasks) == 0 { 185 return offset, syserror.ENOENT 186 } 187 188 if offset == 0 { 189 // Serialize "." and "..". 190 root := fs.RootFromContext(ctx) 191 if root != nil { 192 defer root.DecRef(ctx) 193 } 194 dot, dotdot := file.Dirent.GetDotAttrs(root) 195 if err := dirCtx.DirEmit(".", dot); err != nil { 196 return offset, err 197 } 198 if err := dirCtx.DirEmit("..", dotdot); err != nil { 199 return offset, err 200 } 201 } 202 203 // Serialize tasks. 204 taskInts := make([]int, 0, len(tasks)) 205 for _, tid := range tasks { 206 taskInts = append(taskInts, int(tid)) 207 } 208 209 sort.Sort(sort.IntSlice(taskInts)) 210 // Find the task to start at. 211 idx := sort.SearchInts(taskInts, int(offset)) 212 if idx == len(taskInts) { 213 return offset, nil 214 } 215 taskInts = taskInts[idx:] 216 217 var tid int 218 for _, tid = range taskInts { 219 name := strconv.FormatUint(uint64(tid), 10) 220 attr := fs.GenericDentAttr(fs.SpecialDirectory, device.ProcDevice) 221 if err := dirCtx.DirEmit(name, attr); err != nil { 222 // Returned offset is next tid to serialize. 223 return int64(tid), err 224 } 225 } 226 // We serialized them all. Next offset should be higher than last 227 // serialized tid. 228 return int64(tid) + 1, nil 229 } 230 231 var _ fs.FileOperations = (*subtasksFile)(nil) 232 233 // Lookup loads an Inode in a task's subtask directory into a Dirent. 234 func (s *subtasks) Lookup(ctx context.Context, dir *fs.Inode, p string) (*fs.Dirent, error) { 235 tid, err := strconv.ParseUint(p, 10, 32) 236 if err != nil { 237 return nil, syserror.ENOENT 238 } 239 240 task := s.p.pidns.TaskWithID(kernel.ThreadID(tid)) 241 if task == nil { 242 return nil, syserror.ENOENT 243 } 244 if task.ThreadGroup() != s.t.ThreadGroup() { 245 return nil, syserror.ENOENT 246 } 247 248 td := s.p.newTaskDir(ctx, task, dir.MountSource, false) 249 return fs.NewDirent(ctx, td, p), nil 250 } 251 252 // exe is an fs.InodeOperations symlink for the /proc/PID/exe file. 253 // 254 // +stateify savable 255 type exe struct { 256 ramfs.Symlink 257 258 t *kernel.Task 259 } 260 261 func newExe(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode { 262 exeSymlink := &exe{ 263 Symlink: *ramfs.NewSymlink(ctx, fs.RootOwner, ""), 264 t: t, 265 } 266 return newProcInode(ctx, exeSymlink, msrc, fs.Symlink, t) 267 } 268 269 func (e *exe) executable() (file fsbridge.File, err error) { 270 if err := checkTaskState(e.t); err != nil { 271 return nil, err 272 } 273 e.t.WithMuLocked(func(t *kernel.Task) { 274 mm := t.MemoryManager() 275 if mm == nil { 276 err = linuxerr.EACCES 277 return 278 } 279 280 // The MemoryManager may be destroyed, in which case 281 // MemoryManager.destroy will simply set the executable to nil 282 // (with locks held). 283 file = mm.Executable() 284 if file == nil { 285 err = syserror.ESRCH 286 } 287 }) 288 return 289 } 290 291 // Readlink implements fs.InodeOperations. 292 func (e *exe) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { 293 if !kernel.ContextCanTrace(ctx, e.t, false) { 294 return "", linuxerr.EACCES 295 } 296 297 // Pull out the executable for /proc/TID/exe. 298 exec, err := e.executable() 299 if err != nil { 300 return "", err 301 } 302 defer exec.DecRef(ctx) 303 304 return exec.PathnameWithDeleted(ctx), nil 305 } 306 307 // cwd is an fs.InodeOperations symlink for the /proc/PID/cwd file. 308 // 309 // +stateify savable 310 type cwd struct { 311 ramfs.Symlink 312 313 t *kernel.Task 314 } 315 316 func newCwd(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode { 317 cwdSymlink := &cwd{ 318 Symlink: *ramfs.NewSymlink(ctx, fs.RootOwner, ""), 319 t: t, 320 } 321 return newProcInode(ctx, cwdSymlink, msrc, fs.Symlink, t) 322 } 323 324 // Readlink implements fs.InodeOperations. 325 func (e *cwd) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { 326 if !kernel.ContextCanTrace(ctx, e.t, false) { 327 return "", linuxerr.EACCES 328 } 329 if err := checkTaskState(e.t); err != nil { 330 return "", err 331 } 332 cwd := e.t.FSContext().WorkingDirectory() 333 if cwd == nil { 334 // It could have raced with process deletion. 335 return "", syserror.ESRCH 336 } 337 defer cwd.DecRef(ctx) 338 339 root := fs.RootFromContext(ctx) 340 if root == nil { 341 // It could have raced with process deletion. 342 return "", syserror.ESRCH 343 } 344 defer root.DecRef(ctx) 345 346 name, _ := cwd.FullName(root) 347 return name, nil 348 } 349 350 // namespaceSymlink represents a symlink in the namespacefs, such as the files 351 // in /proc/<pid>/ns. 352 // 353 // +stateify savable 354 type namespaceSymlink struct { 355 ramfs.Symlink 356 357 t *kernel.Task 358 } 359 360 func newNamespaceSymlink(ctx context.Context, t *kernel.Task, msrc *fs.MountSource, name string) *fs.Inode { 361 // TODO(rahat): Namespace symlinks should contain the namespace name and the 362 // inode number for the namespace instance, so for example user:[123456]. We 363 // currently fake the inode number by sticking the symlink inode in its 364 // place. 365 target := fmt.Sprintf("%s:[%d]", name, device.ProcDevice.NextIno()) 366 n := &namespaceSymlink{ 367 Symlink: *ramfs.NewSymlink(ctx, fs.RootOwner, target), 368 t: t, 369 } 370 return newProcInode(ctx, n, msrc, fs.Symlink, t) 371 } 372 373 // Readlink reads the symlink value. 374 func (n *namespaceSymlink) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { 375 if err := checkTaskState(n.t); err != nil { 376 return "", err 377 } 378 return n.Symlink.Readlink(ctx, inode) 379 } 380 381 // Getlink implements fs.InodeOperations.Getlink. 382 func (n *namespaceSymlink) Getlink(ctx context.Context, inode *fs.Inode) (*fs.Dirent, error) { 383 if !kernel.ContextCanTrace(ctx, n.t, false) { 384 return nil, linuxerr.EACCES 385 } 386 if err := checkTaskState(n.t); err != nil { 387 return nil, err 388 } 389 390 // Create a new regular file to fake the namespace file. 391 iops := fsutil.NewNoReadWriteFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0777), linux.PROC_SUPER_MAGIC) 392 return fs.NewDirent(ctx, newProcInode(ctx, iops, inode.MountSource, fs.RegularFile, nil), n.Symlink.Target), nil 393 } 394 395 func newNamespaceDir(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode { 396 contents := map[string]*fs.Inode{ 397 "net": newNamespaceSymlink(ctx, t, msrc, "net"), 398 "pid": newNamespaceSymlink(ctx, t, msrc, "pid"), 399 "user": newNamespaceSymlink(ctx, t, msrc, "user"), 400 } 401 d := ramfs.NewDir(ctx, contents, fs.RootOwner, fs.FilePermsFromMode(0511)) 402 return newProcInode(ctx, d, msrc, fs.SpecialDirectory, t) 403 } 404 405 // memData implements fs.Inode for /proc/[pid]/mem. 406 // 407 // +stateify savable 408 type memData struct { 409 fsutil.SimpleFileInode 410 411 t *kernel.Task 412 } 413 414 // memDataFile implements fs.FileOperations for /proc/[pid]/mem. 415 // 416 // +stateify savable 417 type memDataFile struct { 418 fsutil.FileGenericSeek `state:"nosave"` 419 fsutil.FileNoIoctl `state:"nosave"` 420 fsutil.FileNoMMap `state:"nosave"` 421 fsutil.FileNoWrite `state:"nosave"` 422 fsutil.FileNoSplice `state:"nosave"` 423 fsutil.FileNoopFlush `state:"nosave"` 424 fsutil.FileNoopFsync `state:"nosave"` 425 fsutil.FileNoopRelease `state:"nosave"` 426 fsutil.FileNotDirReaddir `state:"nosave"` 427 fsutil.FileUseInodeUnstableAttr `state:"nosave"` 428 waiter.AlwaysReady `state:"nosave"` 429 430 t *kernel.Task 431 } 432 433 func newMem(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode { 434 inode := &memData{ 435 SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0400), linux.PROC_SUPER_MAGIC), 436 t: t, 437 } 438 return newProcInode(ctx, inode, msrc, fs.SpecialFile, t) 439 } 440 441 // Truncate implements fs.InodeOperations.Truncate. 442 func (m *memData) Truncate(context.Context, *fs.Inode, int64) error { 443 return nil 444 } 445 446 // GetFile implements fs.InodeOperations.GetFile. 447 func (m *memData) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { 448 // TODO(github.com/SagerNet/issue/260): Add check for PTRACE_MODE_ATTACH_FSCREDS 449 // Permission to read this file is governed by PTRACE_MODE_ATTACH_FSCREDS 450 // Since we dont implement setfsuid/setfsgid we can just use PTRACE_MODE_ATTACH 451 if !kernel.ContextCanTrace(ctx, m.t, true) { 452 return nil, linuxerr.EACCES 453 } 454 if err := checkTaskState(m.t); err != nil { 455 return nil, err 456 } 457 // Enable random access reads 458 flags.Pread = true 459 return fs.NewFile(ctx, dirent, flags, &memDataFile{t: m.t}), nil 460 } 461 462 // Read implements fs.FileOperations.Read. 463 func (m *memDataFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { 464 if dst.NumBytes() == 0 { 465 return 0, nil 466 } 467 mm, err := getTaskMM(m.t) 468 if err != nil { 469 return 0, nil 470 } 471 defer mm.DecUsers(ctx) 472 // Buffer the read data because of MM locks 473 buf := make([]byte, dst.NumBytes()) 474 n, readErr := mm.CopyIn(ctx, hostarch.Addr(offset), buf, usermem.IOOpts{IgnorePermissions: true}) 475 if n > 0 { 476 if _, err := dst.CopyOut(ctx, buf[:n]); err != nil { 477 return 0, syserror.EFAULT 478 } 479 return int64(n), nil 480 } 481 if readErr != nil { 482 return 0, syserror.EIO 483 } 484 return 0, nil 485 } 486 487 // mapsData implements seqfile.SeqSource for /proc/[pid]/maps. 488 // 489 // +stateify savable 490 type mapsData struct { 491 t *kernel.Task 492 } 493 494 func newMaps(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode { 495 return newProcInode(ctx, seqfile.NewSeqFile(ctx, &mapsData{t}), msrc, fs.SpecialFile, t) 496 } 497 498 func (md *mapsData) mm() *mm.MemoryManager { 499 var tmm *mm.MemoryManager 500 md.t.WithMuLocked(func(t *kernel.Task) { 501 if mm := t.MemoryManager(); mm != nil { 502 // No additional reference is taken on mm here. This is safe 503 // because MemoryManager.destroy is required to leave the 504 // MemoryManager in a state where it's still usable as a SeqSource. 505 tmm = mm 506 } 507 }) 508 return tmm 509 } 510 511 // NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. 512 func (md *mapsData) NeedsUpdate(generation int64) bool { 513 if mm := md.mm(); mm != nil { 514 return mm.NeedsUpdate(generation) 515 } 516 return true 517 } 518 519 // ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. 520 func (md *mapsData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { 521 if mm := md.mm(); mm != nil { 522 return mm.ReadMapsSeqFileData(ctx, h) 523 } 524 return []seqfile.SeqData{}, 0 525 } 526 527 // smapsData implements seqfile.SeqSource for /proc/[pid]/smaps. 528 // 529 // +stateify savable 530 type smapsData struct { 531 t *kernel.Task 532 } 533 534 func newSmaps(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode { 535 return newProcInode(ctx, seqfile.NewSeqFile(ctx, &smapsData{t}), msrc, fs.SpecialFile, t) 536 } 537 538 func (sd *smapsData) mm() *mm.MemoryManager { 539 var tmm *mm.MemoryManager 540 sd.t.WithMuLocked(func(t *kernel.Task) { 541 if mm := t.MemoryManager(); mm != nil { 542 // No additional reference is taken on mm here. This is safe 543 // because MemoryManager.destroy is required to leave the 544 // MemoryManager in a state where it's still usable as a SeqSource. 545 tmm = mm 546 } 547 }) 548 return tmm 549 } 550 551 // NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. 552 func (sd *smapsData) NeedsUpdate(generation int64) bool { 553 if mm := sd.mm(); mm != nil { 554 return mm.NeedsUpdate(generation) 555 } 556 return true 557 } 558 559 // ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. 560 func (sd *smapsData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { 561 if mm := sd.mm(); mm != nil { 562 return mm.ReadSmapsSeqFileData(ctx, h) 563 } 564 return []seqfile.SeqData{}, 0 565 } 566 567 // +stateify savable 568 type taskStatData struct { 569 t *kernel.Task 570 571 // If tgstats is true, accumulate fault stats (not implemented) and CPU 572 // time across all tasks in t's thread group. 573 tgstats bool 574 575 // pidns is the PID namespace associated with the proc filesystem that 576 // includes the file using this statData. 577 pidns *kernel.PIDNamespace 578 } 579 580 func newTaskStat(ctx context.Context, t *kernel.Task, msrc *fs.MountSource, showSubtasks bool, pidns *kernel.PIDNamespace) *fs.Inode { 581 return newProcInode(ctx, seqfile.NewSeqFile(ctx, &taskStatData{t, showSubtasks /* tgstats */, pidns}), msrc, fs.SpecialFile, t) 582 } 583 584 // NeedsUpdate returns whether the generation is old or not. 585 func (s *taskStatData) NeedsUpdate(generation int64) bool { 586 return true 587 } 588 589 // ReadSeqFileData returns data for the SeqFile reader. 590 // SeqData, the current generation and where in the file the handle corresponds to. 591 func (s *taskStatData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { 592 if h != nil { 593 return nil, 0 594 } 595 596 var buf bytes.Buffer 597 598 fmt.Fprintf(&buf, "%d ", s.pidns.IDOfTask(s.t)) 599 fmt.Fprintf(&buf, "(%s) ", s.t.Name()) 600 fmt.Fprintf(&buf, "%c ", s.t.StateStatus()[0]) 601 ppid := kernel.ThreadID(0) 602 if parent := s.t.Parent(); parent != nil { 603 ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup()) 604 } 605 fmt.Fprintf(&buf, "%d ", ppid) 606 fmt.Fprintf(&buf, "%d ", s.pidns.IDOfProcessGroup(s.t.ThreadGroup().ProcessGroup())) 607 fmt.Fprintf(&buf, "%d ", s.pidns.IDOfSession(s.t.ThreadGroup().Session())) 608 fmt.Fprintf(&buf, "0 0 " /* tty_nr tpgid */) 609 fmt.Fprintf(&buf, "0 " /* flags */) 610 fmt.Fprintf(&buf, "0 0 0 0 " /* minflt cminflt majflt cmajflt */) 611 var cputime usage.CPUStats 612 if s.tgstats { 613 cputime = s.t.ThreadGroup().CPUStats() 614 } else { 615 cputime = s.t.CPUStats() 616 } 617 fmt.Fprintf(&buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime)) 618 cputime = s.t.ThreadGroup().JoinedChildCPUStats() 619 fmt.Fprintf(&buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime)) 620 fmt.Fprintf(&buf, "%d %d ", s.t.Priority(), s.t.Niceness()) 621 fmt.Fprintf(&buf, "%d ", s.t.ThreadGroup().Count()) 622 623 // itrealvalue. Since kernel 2.6.17, this field is no longer 624 // maintained, and is hard coded as 0. 625 fmt.Fprintf(&buf, "0 ") 626 627 // Start time is relative to boot time, expressed in clock ticks. 628 fmt.Fprintf(&buf, "%d ", linux.ClockTFromDuration(s.t.StartTime().Sub(s.t.Kernel().Timekeeper().BootTime()))) 629 630 var vss, rss uint64 631 s.t.WithMuLocked(func(t *kernel.Task) { 632 if mm := t.MemoryManager(); mm != nil { 633 vss = mm.VirtualMemorySize() 634 rss = mm.ResidentSetSize() 635 } 636 }) 637 fmt.Fprintf(&buf, "%d %d ", vss, rss/hostarch.PageSize) 638 639 // rsslim. 640 fmt.Fprintf(&buf, "%d ", s.t.ThreadGroup().Limits().Get(limits.Rss).Cur) 641 642 fmt.Fprintf(&buf, "0 0 0 0 0 " /* startcode endcode startstack kstkesp kstkeip */) 643 fmt.Fprintf(&buf, "0 0 0 0 0 " /* signal blocked sigignore sigcatch wchan */) 644 fmt.Fprintf(&buf, "0 0 " /* nswap cnswap */) 645 terminationSignal := linux.Signal(0) 646 if s.t == s.t.ThreadGroup().Leader() { 647 terminationSignal = s.t.ThreadGroup().TerminationSignal() 648 } 649 fmt.Fprintf(&buf, "%d ", terminationSignal) 650 fmt.Fprintf(&buf, "0 0 0 " /* processor rt_priority policy */) 651 fmt.Fprintf(&buf, "0 0 0 " /* delayacct_blkio_ticks guest_time cguest_time */) 652 fmt.Fprintf(&buf, "0 0 0 0 0 0 0 " /* start_data end_data start_brk arg_start arg_end env_start env_end */) 653 fmt.Fprintf(&buf, "0\n" /* exit_code */) 654 655 return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*taskStatData)(nil)}}, 0 656 } 657 658 // statmData implements seqfile.SeqSource for /proc/[pid]/statm. 659 // 660 // +stateify savable 661 type statmData struct { 662 t *kernel.Task 663 } 664 665 func newStatm(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode { 666 return newProcInode(ctx, seqfile.NewSeqFile(ctx, &statmData{t}), msrc, fs.SpecialFile, t) 667 } 668 669 // NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. 670 func (s *statmData) NeedsUpdate(generation int64) bool { 671 return true 672 } 673 674 // ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. 675 func (s *statmData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { 676 if h != nil { 677 return nil, 0 678 } 679 680 var vss, rss uint64 681 s.t.WithMuLocked(func(t *kernel.Task) { 682 if mm := t.MemoryManager(); mm != nil { 683 vss = mm.VirtualMemorySize() 684 rss = mm.ResidentSetSize() 685 } 686 }) 687 688 var buf bytes.Buffer 689 fmt.Fprintf(&buf, "%d %d 0 0 0 0 0\n", vss/hostarch.PageSize, rss/hostarch.PageSize) 690 691 return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*statmData)(nil)}}, 0 692 } 693 694 // statusData implements seqfile.SeqSource for /proc/[pid]/status. 695 // 696 // +stateify savable 697 type statusData struct { 698 t *kernel.Task 699 pidns *kernel.PIDNamespace 700 } 701 702 func newStatus(ctx context.Context, t *kernel.Task, msrc *fs.MountSource, pidns *kernel.PIDNamespace) *fs.Inode { 703 return newProcInode(ctx, seqfile.NewSeqFile(ctx, &statusData{t, pidns}), msrc, fs.SpecialFile, t) 704 } 705 706 // NeedsUpdate implements seqfile.SeqSource.NeedsUpdate. 707 func (s *statusData) NeedsUpdate(generation int64) bool { 708 return true 709 } 710 711 // ReadSeqFileData implements seqfile.SeqSource.ReadSeqFileData. 712 func (s *statusData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { 713 if h != nil { 714 return nil, 0 715 } 716 717 var buf bytes.Buffer 718 fmt.Fprintf(&buf, "Name:\t%s\n", s.t.Name()) 719 fmt.Fprintf(&buf, "State:\t%s\n", s.t.StateStatus()) 720 fmt.Fprintf(&buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.t.ThreadGroup())) 721 fmt.Fprintf(&buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.t)) 722 ppid := kernel.ThreadID(0) 723 if parent := s.t.Parent(); parent != nil { 724 ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup()) 725 } 726 fmt.Fprintf(&buf, "PPid:\t%d\n", ppid) 727 tpid := kernel.ThreadID(0) 728 if tracer := s.t.Tracer(); tracer != nil { 729 tpid = s.pidns.IDOfTask(tracer) 730 } 731 fmt.Fprintf(&buf, "TracerPid:\t%d\n", tpid) 732 var fds int 733 var vss, rss, data uint64 734 s.t.WithMuLocked(func(t *kernel.Task) { 735 if fdTable := t.FDTable(); fdTable != nil { 736 fds = fdTable.CurrentMaxFDs() 737 } 738 if mm := t.MemoryManager(); mm != nil { 739 vss = mm.VirtualMemorySize() 740 rss = mm.ResidentSetSize() 741 data = mm.VirtualDataSize() 742 } 743 }) 744 fmt.Fprintf(&buf, "FDSize:\t%d\n", fds) 745 fmt.Fprintf(&buf, "VmSize:\t%d kB\n", vss>>10) 746 fmt.Fprintf(&buf, "VmRSS:\t%d kB\n", rss>>10) 747 fmt.Fprintf(&buf, "VmData:\t%d kB\n", data>>10) 748 fmt.Fprintf(&buf, "Threads:\t%d\n", s.t.ThreadGroup().Count()) 749 creds := s.t.Credentials() 750 fmt.Fprintf(&buf, "CapInh:\t%016x\n", creds.InheritableCaps) 751 fmt.Fprintf(&buf, "CapPrm:\t%016x\n", creds.PermittedCaps) 752 fmt.Fprintf(&buf, "CapEff:\t%016x\n", creds.EffectiveCaps) 753 fmt.Fprintf(&buf, "CapBnd:\t%016x\n", creds.BoundingCaps) 754 fmt.Fprintf(&buf, "Seccomp:\t%d\n", s.t.SeccompMode()) 755 // We unconditionally report a single NUMA node. See 756 // pkg/sentry/syscalls/linux/sys_mempolicy.go. 757 fmt.Fprintf(&buf, "Mems_allowed:\t1\n") 758 fmt.Fprintf(&buf, "Mems_allowed_list:\t0\n") 759 return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*statusData)(nil)}}, 0 760 } 761 762 // ioUsage is the /proc/<pid>/io and /proc/<pid>/task/<tid>/io data provider. 763 type ioUsage interface { 764 // IOUsage returns the io usage data. 765 IOUsage() *usage.IO 766 } 767 768 // +stateify savable 769 type ioData struct { 770 ioUsage 771 } 772 773 func newIO(ctx context.Context, t *kernel.Task, msrc *fs.MountSource, isThreadGroup bool) *fs.Inode { 774 if isThreadGroup { 775 return newProcInode(ctx, seqfile.NewSeqFile(ctx, &ioData{t.ThreadGroup()}), msrc, fs.SpecialFile, t) 776 } 777 return newProcInode(ctx, seqfile.NewSeqFile(ctx, &ioData{t}), msrc, fs.SpecialFile, t) 778 } 779 780 // NeedsUpdate returns whether the generation is old or not. 781 func (i *ioData) NeedsUpdate(generation int64) bool { 782 return true 783 } 784 785 // ReadSeqFileData returns data for the SeqFile reader. 786 // SeqData, the current generation and where in the file the handle corresponds to. 787 func (i *ioData) ReadSeqFileData(ctx context.Context, h seqfile.SeqHandle) ([]seqfile.SeqData, int64) { 788 if h != nil { 789 return nil, 0 790 } 791 792 io := usage.IO{} 793 io.Accumulate(i.IOUsage()) 794 795 var buf bytes.Buffer 796 fmt.Fprintf(&buf, "rchar: %d\n", io.CharsRead) 797 fmt.Fprintf(&buf, "wchar: %d\n", io.CharsWritten) 798 fmt.Fprintf(&buf, "syscr: %d\n", io.ReadSyscalls) 799 fmt.Fprintf(&buf, "syscw: %d\n", io.WriteSyscalls) 800 fmt.Fprintf(&buf, "read_bytes: %d\n", io.BytesRead) 801 fmt.Fprintf(&buf, "write_bytes: %d\n", io.BytesWritten) 802 fmt.Fprintf(&buf, "cancelled_write_bytes: %d\n", io.BytesWriteCancelled) 803 804 return []seqfile.SeqData{{Buf: buf.Bytes(), Handle: (*ioData)(nil)}}, 0 805 } 806 807 // comm is a file containing the command name for a task. 808 // 809 // On Linux, /proc/[pid]/comm is writable, and writing to the comm file changes 810 // the thread name. We don't implement this yet as there are no known users of 811 // this feature. 812 // 813 // +stateify savable 814 type comm struct { 815 fsutil.SimpleFileInode 816 817 t *kernel.Task 818 } 819 820 // newComm returns a new comm file. 821 func newComm(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode { 822 c := &comm{ 823 SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), 824 t: t, 825 } 826 return newProcInode(ctx, c, msrc, fs.SpecialFile, t) 827 } 828 829 // Check implements fs.InodeOperations.Check. 830 func (c *comm) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool { 831 // This file can always be read or written by members of the same 832 // thread group. See fs/proc/base.c:proc_tid_comm_permission. 833 // 834 // N.B. This check is currently a no-op as we don't yet support writing 835 // and this file is world-readable anyways. 836 t := kernel.TaskFromContext(ctx) 837 if t != nil && t.ThreadGroup() == c.t.ThreadGroup() && !p.Execute { 838 return true 839 } 840 841 return fs.ContextCanAccessFile(ctx, inode, p) 842 } 843 844 // GetFile implements fs.InodeOperations.GetFile. 845 func (c *comm) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { 846 return fs.NewFile(ctx, dirent, flags, &commFile{t: c.t}), nil 847 } 848 849 // +stateify savable 850 type commFile struct { 851 fsutil.FileGenericSeek `state:"nosave"` 852 fsutil.FileNoIoctl `state:"nosave"` 853 fsutil.FileNoMMap `state:"nosave"` 854 fsutil.FileNoSplice `state:"nosave"` 855 fsutil.FileNoWrite `state:"nosave"` 856 fsutil.FileNoopFlush `state:"nosave"` 857 fsutil.FileNoopFsync `state:"nosave"` 858 fsutil.FileNoopRelease `state:"nosave"` 859 fsutil.FileNotDirReaddir `state:"nosave"` 860 fsutil.FileUseInodeUnstableAttr `state:"nosave"` 861 waiter.AlwaysReady `state:"nosave"` 862 863 t *kernel.Task 864 } 865 866 var _ fs.FileOperations = (*commFile)(nil) 867 868 // Read implements fs.FileOperations.Read. 869 func (f *commFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { 870 if offset < 0 { 871 return 0, linuxerr.EINVAL 872 } 873 874 buf := []byte(f.t.Name() + "\n") 875 if offset >= int64(len(buf)) { 876 return 0, io.EOF 877 } 878 879 n, err := dst.CopyOut(ctx, buf[offset:]) 880 return int64(n), err 881 } 882 883 // auxvec is a file containing the auxiliary vector for a task. 884 // 885 // +stateify savable 886 type auxvec struct { 887 fsutil.SimpleFileInode 888 889 t *kernel.Task 890 } 891 892 // newAuxvec returns a new auxvec file. 893 func newAuxvec(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode { 894 a := &auxvec{ 895 SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0444), linux.PROC_SUPER_MAGIC), 896 t: t, 897 } 898 return newProcInode(ctx, a, msrc, fs.SpecialFile, t) 899 } 900 901 // GetFile implements fs.InodeOperations.GetFile. 902 func (a *auxvec) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { 903 return fs.NewFile(ctx, dirent, flags, &auxvecFile{t: a.t}), nil 904 } 905 906 // +stateify savable 907 type auxvecFile struct { 908 fsutil.FileGenericSeek `state:"nosave"` 909 fsutil.FileNoIoctl `state:"nosave"` 910 fsutil.FileNoMMap `state:"nosave"` 911 fsutil.FileNoSplice `state:"nosave"` 912 fsutil.FileNoWrite `state:"nosave"` 913 fsutil.FileNoopFlush `state:"nosave"` 914 fsutil.FileNoopFsync `state:"nosave"` 915 fsutil.FileNoopRelease `state:"nosave"` 916 fsutil.FileNotDirReaddir `state:"nosave"` 917 fsutil.FileUseInodeUnstableAttr `state:"nosave"` 918 waiter.AlwaysReady `state:"nosave"` 919 920 t *kernel.Task 921 } 922 923 // Read implements fs.FileOperations.Read. 924 func (f *auxvecFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { 925 if offset < 0 { 926 return 0, linuxerr.EINVAL 927 } 928 929 m, err := getTaskMM(f.t) 930 if err != nil { 931 return 0, err 932 } 933 defer m.DecUsers(ctx) 934 auxv := m.Auxv() 935 936 // Space for buffer with AT_NULL (0) terminator at the end. 937 size := (len(auxv) + 1) * 16 938 if offset >= int64(size) { 939 return 0, io.EOF 940 } 941 942 buf := make([]byte, size) 943 for i, e := range auxv { 944 hostarch.ByteOrder.PutUint64(buf[16*i:], e.Key) 945 hostarch.ByteOrder.PutUint64(buf[16*i+8:], uint64(e.Value)) 946 } 947 948 n, err := dst.CopyOut(ctx, buf[offset:]) 949 return int64(n), err 950 } 951 952 // newOOMScore returns a oom_score file. It is a stub that always returns 0. 953 // TODO(github.com/SagerNet/issue/1967) 954 func newOOMScore(ctx context.Context, msrc *fs.MountSource) *fs.Inode { 955 return newStaticProcInode(ctx, msrc, []byte("0\n")) 956 } 957 958 // oomScoreAdj is a file containing the oom_score adjustment for a task. 959 // 960 // +stateify savable 961 type oomScoreAdj struct { 962 fsutil.SimpleFileInode 963 964 t *kernel.Task 965 } 966 967 // +stateify savable 968 type oomScoreAdjFile struct { 969 fsutil.FileGenericSeek `state:"nosave"` 970 fsutil.FileNoIoctl `state:"nosave"` 971 fsutil.FileNoMMap `state:"nosave"` 972 fsutil.FileNoSplice `state:"nosave"` 973 fsutil.FileNoopFlush `state:"nosave"` 974 fsutil.FileNoopFsync `state:"nosave"` 975 fsutil.FileNoopRelease `state:"nosave"` 976 fsutil.FileNotDirReaddir `state:"nosave"` 977 fsutil.FileUseInodeUnstableAttr `state:"nosave"` 978 waiter.AlwaysReady `state:"nosave"` 979 980 t *kernel.Task 981 } 982 983 // newOOMScoreAdj returns a oom_score_adj file. 984 func newOOMScoreAdj(ctx context.Context, t *kernel.Task, msrc *fs.MountSource) *fs.Inode { 985 i := &oomScoreAdj{ 986 SimpleFileInode: *fsutil.NewSimpleFileInode(ctx, fs.RootOwner, fs.FilePermsFromMode(0644), linux.PROC_SUPER_MAGIC), 987 t: t, 988 } 989 return newProcInode(ctx, i, msrc, fs.SpecialFile, t) 990 } 991 992 // Truncate implements fs.InodeOperations.Truncate. Truncate is called when 993 // O_TRUNC is specified for any kind of existing Dirent but is not called via 994 // (f)truncate for proc files. 995 func (*oomScoreAdj) Truncate(context.Context, *fs.Inode, int64) error { 996 return nil 997 } 998 999 // GetFile implements fs.InodeOperations.GetFile. 1000 func (o *oomScoreAdj) GetFile(ctx context.Context, dirent *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { 1001 return fs.NewFile(ctx, dirent, flags, &oomScoreAdjFile{t: o.t}), nil 1002 } 1003 1004 // Read implements fs.FileOperations.Read. 1005 func (f *oomScoreAdjFile) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { 1006 if f.t.ExitState() == kernel.TaskExitDead { 1007 return 0, syserror.ESRCH 1008 } 1009 var buf bytes.Buffer 1010 fmt.Fprintf(&buf, "%d\n", f.t.OOMScoreAdj()) 1011 if offset >= int64(buf.Len()) { 1012 return 0, io.EOF 1013 } 1014 n, err := dst.CopyOut(ctx, buf.Bytes()[offset:]) 1015 return int64(n), err 1016 } 1017 1018 // Write implements fs.FileOperations.Write. 1019 func (f *oomScoreAdjFile) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, offset int64) (int64, error) { 1020 if src.NumBytes() == 0 { 1021 return 0, nil 1022 } 1023 1024 // Limit input size so as not to impact performance if input size is large. 1025 src = src.TakeFirst(hostarch.PageSize - 1) 1026 1027 var v int32 1028 n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) 1029 if err != nil { 1030 return 0, err 1031 } 1032 1033 if f.t.ExitState() == kernel.TaskExitDead { 1034 return 0, syserror.ESRCH 1035 } 1036 if err := f.t.SetOOMScoreAdj(v); err != nil { 1037 return 0, err 1038 } 1039 1040 return n, nil 1041 } 1042 1043 // LINT.ThenChange(../../fsimpl/proc/task.go|../../fsimpl/proc/task_files.go)