github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/syscalls/linux/sys_thread.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package linux 16 17 import ( 18 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 19 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 20 "github.com/nicocha30/gvisor-ligolo/pkg/fspath" 21 "github.com/nicocha30/gvisor-ligolo/pkg/hostarch" 22 "github.com/nicocha30/gvisor-ligolo/pkg/marshal/primitive" 23 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/arch" 24 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel" 25 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/sched" 26 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/loader" 27 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/seccheck" 28 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs" 29 "github.com/nicocha30/gvisor-ligolo/pkg/usermem" 30 ) 31 32 var ( 33 // ExecMaxTotalSize is the maximum length of all argv and envv entries. 34 // 35 // N.B. The behavior here is different than Linux. Linux provides a limit on 36 // individual arguments of 32 pages, and an aggregate limit of at least 32 pages 37 // but otherwise bounded by min(stack size / 4, 8 MB * 3 / 4). We don't implement 38 // any behavior based on the stack size, and instead provide a fixed hard-limit of 39 // 2 MB (which should work well given that 8 MB stack limits are common). 40 ExecMaxTotalSize = 2 * 1024 * 1024 41 42 // ExecMaxElemSize is the maximum length of a single argv or envv entry. 43 ExecMaxElemSize = 32 * hostarch.PageSize 44 ) 45 46 // Getppid implements linux syscall getppid(2). 47 func Getppid(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 48 parent := t.Parent() 49 if parent == nil { 50 return 0, nil, nil 51 } 52 return uintptr(t.PIDNamespace().IDOfThreadGroup(parent.ThreadGroup())), nil, nil 53 } 54 55 // Getpid implements linux syscall getpid(2). 56 func Getpid(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 57 return uintptr(t.ThreadGroup().ID()), nil, nil 58 } 59 60 // Gettid implements linux syscall gettid(2). 61 func Gettid(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 62 return uintptr(t.ThreadID()), nil, nil 63 } 64 65 // Execve implements linux syscall execve(2). 66 func Execve(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 67 pathnameAddr := args[0].Pointer() 68 argvAddr := args[1].Pointer() 69 envvAddr := args[2].Pointer() 70 return execveat(t, linux.AT_FDCWD, pathnameAddr, argvAddr, envvAddr, 0 /* flags */) 71 } 72 73 // Execveat implements linux syscall execveat(2). 74 func Execveat(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 75 dirfd := args[0].Int() 76 pathnameAddr := args[1].Pointer() 77 argvAddr := args[2].Pointer() 78 envvAddr := args[3].Pointer() 79 flags := args[4].Int() 80 return execveat(t, dirfd, pathnameAddr, argvAddr, envvAddr, flags) 81 } 82 83 func execveat(t *kernel.Task, dirfd int32, pathnameAddr, argvAddr, envvAddr hostarch.Addr, flags int32) (uintptr, *kernel.SyscallControl, error) { 84 if flags&^(linux.AT_EMPTY_PATH|linux.AT_SYMLINK_NOFOLLOW) != 0 { 85 return 0, nil, linuxerr.EINVAL 86 } 87 88 pathname, err := t.CopyInString(pathnameAddr, linux.PATH_MAX) 89 if err != nil { 90 return 0, nil, err 91 } 92 var argv, envv []string 93 if argvAddr != 0 { 94 var err error 95 argv, err = t.CopyInVector(argvAddr, ExecMaxElemSize, ExecMaxTotalSize) 96 if err != nil { 97 return 0, nil, err 98 } 99 } 100 if envvAddr != 0 { 101 var err error 102 envv, err = t.CopyInVector(envvAddr, ExecMaxElemSize, ExecMaxTotalSize) 103 if err != nil { 104 return 0, nil, err 105 } 106 } 107 108 root := t.FSContext().RootDirectory() 109 defer root.DecRef(t) 110 var executable *vfs.FileDescription 111 defer func() { 112 if executable != nil { 113 executable.DecRef(t) 114 } 115 }() 116 closeOnExec := false 117 if path := fspath.Parse(pathname); dirfd != linux.AT_FDCWD && !path.Absolute { 118 // We must open the executable ourselves since dirfd is used as the 119 // starting point while resolving path, but the task working directory 120 // is used as the starting point while resolving interpreters (Linux: 121 // fs/binfmt_script.c:load_script() => fs/exec.c:open_exec() => 122 // do_open_execat(fd=AT_FDCWD)), and the loader package is currently 123 // incapable of handling this correctly. 124 if !path.HasComponents() && flags&linux.AT_EMPTY_PATH == 0 { 125 return 0, nil, linuxerr.ENOENT 126 } 127 dirfile, dirfileFlags := t.FDTable().Get(dirfd) 128 if dirfile == nil { 129 return 0, nil, linuxerr.EBADF 130 } 131 start := dirfile.VirtualDentry() 132 start.IncRef() 133 dirfile.DecRef(t) 134 closeOnExec = dirfileFlags.CloseOnExec 135 file, err := t.Kernel().VFS().OpenAt(t, t.Credentials(), &vfs.PathOperation{ 136 Root: root, 137 Start: start, 138 Path: path, 139 FollowFinalSymlink: flags&linux.AT_SYMLINK_NOFOLLOW == 0, 140 }, &vfs.OpenOptions{ 141 Flags: linux.O_RDONLY, 142 FileExec: true, 143 }) 144 start.DecRef(t) 145 if err != nil { 146 return 0, nil, err 147 } 148 executable = file 149 pathname = executable.MappedName(t) 150 } 151 152 // Load the new TaskImage. 153 wd := t.FSContext().WorkingDirectory() 154 defer wd.DecRef(t) 155 remainingTraversals := uint(linux.MaxSymlinkTraversals) 156 loadArgs := loader.LoadArgs{ 157 Root: root, 158 WorkingDir: wd, 159 RemainingTraversals: &remainingTraversals, 160 ResolveFinal: flags&linux.AT_SYMLINK_NOFOLLOW == 0, 161 Filename: pathname, 162 File: executable, 163 CloseOnExec: closeOnExec, 164 Argv: argv, 165 Envv: envv, 166 Features: t.Kernel().FeatureSet(), 167 } 168 if seccheck.Global.Enabled(seccheck.PointExecve) { 169 // Retain the first executable file that is opened (which may open 170 // multiple executable files while resolving interpreter scripts). 171 if executable == nil { 172 loadArgs.AfterOpen = func(f *vfs.FileDescription) { 173 if executable == nil { 174 f.IncRef() 175 executable = f 176 pathname = executable.MappedName(t) 177 } 178 } 179 } 180 } 181 182 image, se := t.Kernel().LoadTaskImage(t, loadArgs) 183 if se != nil { 184 return 0, nil, se.ToError() 185 } 186 187 ctrl, err := t.Execve(image, argv, envv, executable, pathname) 188 return 0, ctrl, err 189 } 190 191 // Exit implements linux syscall exit(2). 192 func Exit(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 193 status := args[0].Int() 194 t.PrepareExit(linux.WaitStatusExit(status & 0xff)) 195 return 0, kernel.CtrlDoExit, nil 196 } 197 198 // ExitGroup implements linux syscall exit_group(2). 199 func ExitGroup(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 200 status := args[0].Int() 201 t.PrepareGroupExit(linux.WaitStatusExit(status & 0xff)) 202 return 0, kernel.CtrlDoExit, nil 203 } 204 205 // clone is used by Clone, Fork, and VFork. 206 func clone(t *kernel.Task, flags int, stack hostarch.Addr, parentTID hostarch.Addr, childTID hostarch.Addr, tls hostarch.Addr) (uintptr, *kernel.SyscallControl, error) { 207 args := linux.CloneArgs{ 208 Flags: uint64(uint32(flags) &^ linux.CSIGNAL), 209 Pidfd: uint64(parentTID), 210 ChildTID: uint64(childTID), 211 ParentTID: uint64(parentTID), 212 ExitSignal: uint64(flags & linux.CSIGNAL), 213 Stack: uint64(stack), 214 TLS: uint64(tls), 215 } 216 ntid, ctrl, err := t.Clone(&args) 217 return uintptr(ntid), ctrl, err 218 } 219 220 // Fork implements Linux syscall fork(2). 221 func Fork(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 222 // "A call to fork() is equivalent to a call to clone(2) specifying flags 223 // as just SIGCHLD." - fork(2) 224 return clone(t, int(linux.SIGCHLD), 0, 0, 0, 0) 225 } 226 227 // Vfork implements Linux syscall vfork(2). 228 func Vfork(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 229 // """ 230 // A call to vfork() is equivalent to calling clone(2) with flags specified as: 231 // 232 // CLONE_VM | CLONE_VFORK | SIGCHLD 233 // """ - vfork(2) 234 return clone(t, linux.CLONE_VM|linux.CLONE_VFORK|int(linux.SIGCHLD), 0, 0, 0, 0) 235 } 236 237 // parseCommonWaitOptions applies the options common to wait4 and waitid to 238 // wopts. 239 func parseCommonWaitOptions(wopts *kernel.WaitOptions, options int) error { 240 switch options & (linux.WCLONE | linux.WALL) { 241 case 0: 242 wopts.NonCloneTasks = true 243 case linux.WCLONE: 244 wopts.CloneTasks = true 245 case linux.WALL: 246 wopts.NonCloneTasks = true 247 wopts.CloneTasks = true 248 default: 249 return linuxerr.EINVAL 250 } 251 if options&linux.WCONTINUED != 0 { 252 wopts.Events |= kernel.EventGroupContinue 253 } 254 if options&linux.WNOHANG == 0 { 255 wopts.BlockInterruptErr = linuxerr.ERESTARTSYS 256 } 257 if options&linux.WNOTHREAD == 0 { 258 wopts.SiblingChildren = true 259 } 260 return nil 261 } 262 263 // wait4 waits for the given child process to exit. 264 func wait4(t *kernel.Task, pid int, statusAddr hostarch.Addr, options int, rusageAddr hostarch.Addr) (uintptr, error) { 265 if options&^(linux.WNOHANG|linux.WUNTRACED|linux.WCONTINUED|linux.WNOTHREAD|linux.WALL|linux.WCLONE) != 0 { 266 return 0, linuxerr.EINVAL 267 } 268 wopts := kernel.WaitOptions{ 269 Events: kernel.EventExit | kernel.EventTraceeStop, 270 ConsumeEvent: true, 271 } 272 // There are four cases to consider: 273 // 274 // pid < -1 any child process whose process group ID is equal to the absolute value of pid 275 // pid == -1 any child process 276 // pid == 0 any child process whose process group ID is equal to that of the calling process 277 // pid > 0 the child whose process ID is equal to the value of pid 278 switch { 279 case pid < -1: 280 wopts.SpecificPGID = kernel.ProcessGroupID(-pid) 281 case pid == -1: 282 // Any process is the default. 283 case pid == 0: 284 wopts.SpecificPGID = t.PIDNamespace().IDOfProcessGroup(t.ThreadGroup().ProcessGroup()) 285 default: 286 wopts.SpecificTID = kernel.ThreadID(pid) 287 } 288 289 if err := parseCommonWaitOptions(&wopts, options); err != nil { 290 return 0, err 291 } 292 if options&linux.WUNTRACED != 0 { 293 wopts.Events |= kernel.EventChildGroupStop 294 } 295 296 wr, err := t.Wait(&wopts) 297 if err != nil { 298 if err == kernel.ErrNoWaitableEvent { 299 return 0, nil 300 } 301 return 0, err 302 } 303 if statusAddr != 0 { 304 if _, err := primitive.CopyUint32Out(t, statusAddr, uint32(wr.Status)); err != nil { 305 return 0, err 306 } 307 } 308 if rusageAddr != 0 { 309 ru := getrusage(wr.Task, linux.RUSAGE_BOTH) 310 if _, err := ru.CopyOut(t, rusageAddr); err != nil { 311 return 0, err 312 } 313 } 314 return uintptr(wr.TID), nil 315 } 316 317 // Wait4 implements linux syscall wait4(2). 318 func Wait4(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 319 pid := int(args[0].Int()) 320 statusAddr := args[1].Pointer() 321 options := int(args[2].Uint()) 322 rusageAddr := args[3].Pointer() 323 324 n, err := wait4(t, pid, statusAddr, options, rusageAddr) 325 return n, nil, err 326 } 327 328 // WaitPid implements linux syscall waitpid(2). 329 func WaitPid(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 330 pid := int(args[0].Int()) 331 statusAddr := args[1].Pointer() 332 options := int(args[2].Uint()) 333 334 n, err := wait4(t, pid, statusAddr, options, 0) 335 return n, nil, err 336 } 337 338 // Waitid implements linux syscall waitid(2). 339 func Waitid(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 340 idtype := args[0].Int() 341 id := args[1].Int() 342 infop := args[2].Pointer() 343 options := int(args[3].Uint()) 344 rusageAddr := args[4].Pointer() 345 346 if options&^(linux.WNOHANG|linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED|linux.WNOWAIT|linux.WNOTHREAD|linux.WALL|linux.WCLONE) != 0 { 347 return 0, nil, linuxerr.EINVAL 348 } 349 if options&(linux.WEXITED|linux.WSTOPPED|linux.WCONTINUED) == 0 { 350 return 0, nil, linuxerr.EINVAL 351 } 352 wopts := kernel.WaitOptions{ 353 Events: kernel.EventTraceeStop, 354 ConsumeEvent: options&linux.WNOWAIT == 0, 355 } 356 switch idtype { 357 case linux.P_ALL: 358 case linux.P_PID: 359 wopts.SpecificTID = kernel.ThreadID(id) 360 case linux.P_PGID: 361 wopts.SpecificPGID = kernel.ProcessGroupID(id) 362 default: 363 return 0, nil, linuxerr.EINVAL 364 } 365 366 if err := parseCommonWaitOptions(&wopts, options); err != nil { 367 return 0, nil, err 368 } 369 if options&linux.WEXITED != 0 { 370 wopts.Events |= kernel.EventExit 371 } 372 if options&linux.WSTOPPED != 0 { 373 wopts.Events |= kernel.EventChildGroupStop 374 } 375 376 wr, err := t.Wait(&wopts) 377 if err != nil { 378 if err == kernel.ErrNoWaitableEvent { 379 err = nil 380 // "If WNOHANG was specified in options and there were no children 381 // in a waitable state, then waitid() returns 0 immediately and the 382 // state of the siginfo_t structure pointed to by infop is 383 // unspecified." - waitid(2). But Linux's waitid actually zeroes 384 // out the fields it would set for a successful waitid in this case 385 // as well. 386 if infop != 0 { 387 var si linux.SignalInfo 388 _, err = si.CopyOut(t, infop) 389 } 390 } 391 return 0, nil, err 392 } 393 if rusageAddr != 0 { 394 ru := getrusage(wr.Task, linux.RUSAGE_BOTH) 395 if _, err := ru.CopyOut(t, rusageAddr); err != nil { 396 return 0, nil, err 397 } 398 } 399 if infop == 0 { 400 return 0, nil, nil 401 } 402 si := linux.SignalInfo{ 403 Signo: int32(linux.SIGCHLD), 404 } 405 si.SetPID(int32(wr.TID)) 406 si.SetUID(int32(wr.UID)) 407 s := wr.Status 408 switch { 409 case s.Exited(): 410 si.Code = linux.CLD_EXITED 411 si.SetStatus(int32(s.ExitStatus())) 412 case s.Signaled(): 413 if s.CoreDumped() { 414 si.Code = linux.CLD_DUMPED 415 } else { 416 si.Code = linux.CLD_KILLED 417 } 418 si.SetStatus(int32(s.TerminationSignal())) 419 case s.Stopped(): 420 if wr.Event == kernel.EventTraceeStop { 421 si.Code = linux.CLD_TRAPPED 422 si.SetStatus(int32(s.PtraceEvent())) 423 } else { 424 si.Code = linux.CLD_STOPPED 425 si.SetStatus(int32(s.StopSignal())) 426 } 427 case s.Continued(): 428 si.Code = linux.CLD_CONTINUED 429 si.SetStatus(int32(linux.SIGCONT)) 430 default: 431 t.Warningf("waitid got incomprehensible wait status %d", s) 432 } 433 _, err = si.CopyOut(t, infop) 434 return 0, nil, err 435 } 436 437 // SetTidAddress implements linux syscall set_tid_address(2). 438 func SetTidAddress(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 439 addr := args[0].Pointer() 440 441 // Always succeed, return caller's tid. 442 t.SetClearTID(addr) 443 return uintptr(t.ThreadID()), nil, nil 444 } 445 446 // Setns implements linux syscall setns(2). 447 func Setns(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 448 fd := args[0].Int() 449 450 file := t.GetFile(fd) 451 if file == nil { 452 return 0, nil, linuxerr.EBADF 453 } 454 defer file.DecRef(t) 455 456 flags := args[1].Int() 457 return 0, nil, t.Setns(file, flags) 458 } 459 460 // Unshare implements linux syscall unshare(2). 461 func Unshare(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 462 flags := args[0].Int() 463 // "CLONE_NEWPID automatically implies CLONE_THREAD as well." - unshare(2) 464 if flags&linux.CLONE_NEWPID != 0 { 465 flags |= linux.CLONE_THREAD 466 } 467 // "... specifying CLONE_NEWUSER automatically implies CLONE_THREAD. Since 468 // Linux 3.9, CLONE_NEWUSER also automatically implies CLONE_FS." 469 if flags&linux.CLONE_NEWUSER != 0 { 470 flags |= linux.CLONE_THREAD | linux.CLONE_FS 471 } 472 return 0, nil, t.Unshare(flags) 473 } 474 475 // SchedYield implements linux syscall sched_yield(2). 476 func SchedYield(t *kernel.Task, sysno uintptr, _ arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 477 t.Yield() 478 return 0, nil, nil 479 } 480 481 // SchedSetaffinity implements linux syscall sched_setaffinity(2). 482 func SchedSetaffinity(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 483 tid := args[0].Int() 484 size := args[1].SizeT() 485 maskAddr := args[2].Pointer() 486 487 var task *kernel.Task 488 if tid == 0 { 489 task = t 490 } else { 491 task = t.PIDNamespace().TaskWithID(kernel.ThreadID(tid)) 492 if task == nil { 493 return 0, nil, linuxerr.ESRCH 494 } 495 } 496 497 mask := sched.NewCPUSet(t.Kernel().ApplicationCores()) 498 if size > mask.Size() { 499 size = mask.Size() 500 } 501 if _, err := t.CopyInBytes(maskAddr, mask[:size]); err != nil { 502 return 0, nil, err 503 } 504 return 0, nil, task.SetCPUMask(mask) 505 } 506 507 // SchedGetaffinity implements linux syscall sched_getaffinity(2). 508 func SchedGetaffinity(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 509 tid := args[0].Int() 510 size := args[1].SizeT() 511 maskAddr := args[2].Pointer() 512 513 // This limitation is because linux stores the cpumask 514 // in an array of "unsigned long" so the buffer needs to 515 // be a multiple of the word size. 516 if size&(t.Arch().Width()-1) > 0 { 517 return 0, nil, linuxerr.EINVAL 518 } 519 520 var task *kernel.Task 521 if tid == 0 { 522 task = t 523 } else { 524 task = t.PIDNamespace().TaskWithID(kernel.ThreadID(tid)) 525 if task == nil { 526 return 0, nil, linuxerr.ESRCH 527 } 528 } 529 530 mask := task.CPUMask() 531 // The buffer needs to be big enough to hold a cpumask with 532 // all possible cpus. 533 if size < mask.Size() { 534 return 0, nil, linuxerr.EINVAL 535 } 536 _, err := t.CopyOutBytes(maskAddr, mask) 537 538 // NOTE: The syscall interface is slightly different than the glibc 539 // interface. The raw sched_getaffinity syscall returns the number of 540 // bytes used to represent a cpu mask. 541 return uintptr(mask.Size()), nil, err 542 } 543 544 // Getcpu implements linux syscall getcpu(2). 545 func Getcpu(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 546 cpu := args[0].Pointer() 547 node := args[1].Pointer() 548 // third argument to this system call is nowadays unused. 549 550 if cpu != 0 { 551 if _, err := primitive.CopyInt32Out(t, cpu, t.CPU()); err != nil { 552 return 0, nil, err 553 } 554 } 555 // We always return node 0. 556 if node != 0 { 557 if _, err := t.MemoryManager().ZeroOut(t, node, 4, usermem.IOOpts{ 558 AddressSpaceActive: true, 559 }); err != nil { 560 return 0, nil, err 561 } 562 } 563 return 0, nil, nil 564 } 565 566 // Setpgid implements the linux syscall setpgid(2). 567 func Setpgid(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 568 // Note that throughout this function, pgid is interpreted with respect 569 // to t's namespace, not with respect to the selected ThreadGroup's 570 // namespace (which may be different). 571 pid := kernel.ThreadID(args[0].Int()) 572 pgid := kernel.ProcessGroupID(args[1].Int()) 573 574 // "If pid is zero, then the process ID of the calling process is used." 575 tg := t.ThreadGroup() 576 if pid != 0 { 577 ot := t.PIDNamespace().TaskWithID(pid) 578 if ot == nil { 579 return 0, nil, linuxerr.ESRCH 580 } 581 tg = ot.ThreadGroup() 582 if tg.Leader() != ot { 583 return 0, nil, linuxerr.EINVAL 584 } 585 586 // Setpgid only operates on child threadgroups. 587 if tg != t.ThreadGroup() && (tg.Leader().Parent() == nil || tg.Leader().Parent().ThreadGroup() != t.ThreadGroup()) { 588 return 0, nil, linuxerr.ESRCH 589 } 590 } 591 592 // "If pgid is zero, then the PGID of the process specified by pid is made 593 // the same as its process ID." 594 defaultPGID := kernel.ProcessGroupID(t.PIDNamespace().IDOfThreadGroup(tg)) 595 if pgid == 0 { 596 pgid = defaultPGID 597 } else if pgid < 0 { 598 return 0, nil, linuxerr.EINVAL 599 } 600 601 // If the pgid is the same as the group, then create a new one. Otherwise, 602 // we attempt to join an existing process group. 603 if pgid == defaultPGID { 604 // For convenience, errors line up with Linux syscall API. 605 if err := tg.CreateProcessGroup(); err != nil { 606 // Is the process group already as expected? If so, 607 // just return success. This is the same behavior as 608 // Linux. 609 if t.PIDNamespace().IDOfProcessGroup(tg.ProcessGroup()) == defaultPGID { 610 return 0, nil, nil 611 } 612 return 0, nil, err 613 } 614 } else { 615 // Same as CreateProcessGroup, above. 616 if err := tg.JoinProcessGroup(t.PIDNamespace(), pgid, tg != t.ThreadGroup()); err != nil { 617 // See above. 618 if t.PIDNamespace().IDOfProcessGroup(tg.ProcessGroup()) == pgid { 619 return 0, nil, nil 620 } 621 return 0, nil, err 622 } 623 } 624 625 // Success. 626 return 0, nil, nil 627 } 628 629 // Getpgrp implements the linux syscall getpgrp(2). 630 func Getpgrp(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 631 return uintptr(t.PIDNamespace().IDOfProcessGroup(t.ThreadGroup().ProcessGroup())), nil, nil 632 } 633 634 // Getpgid implements the linux syscall getpgid(2). 635 func Getpgid(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 636 tid := kernel.ThreadID(args[0].Int()) 637 if tid == 0 { 638 return Getpgrp(t, sysno, args) 639 } 640 641 target := t.PIDNamespace().TaskWithID(tid) 642 if target == nil { 643 return 0, nil, linuxerr.ESRCH 644 } 645 646 return uintptr(t.PIDNamespace().IDOfProcessGroup(target.ThreadGroup().ProcessGroup())), nil, nil 647 } 648 649 // Setsid implements the linux syscall setsid(2). 650 func Setsid(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 651 return 0, nil, t.ThreadGroup().CreateSession() 652 } 653 654 // Getsid implements the linux syscall getsid(2). 655 func Getsid(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 656 tid := kernel.ThreadID(args[0].Int()) 657 if tid == 0 { 658 return uintptr(t.PIDNamespace().IDOfSession(t.ThreadGroup().Session())), nil, nil 659 } 660 661 target := t.PIDNamespace().TaskWithID(tid) 662 if target == nil { 663 return 0, nil, linuxerr.ESRCH 664 } 665 666 return uintptr(t.PIDNamespace().IDOfSession(target.ThreadGroup().Session())), nil, nil 667 } 668 669 // Getpriority pretends to implement the linux syscall getpriority(2). 670 // 671 // This is a stub; real priorities require a full scheduler. 672 func Getpriority(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 673 which := args[0].Int() 674 who := kernel.ThreadID(args[1].Int()) 675 676 switch which { 677 case linux.PRIO_PROCESS: 678 // Look for who, return ESRCH if not found. 679 var task *kernel.Task 680 if who == 0 { 681 task = t 682 } else { 683 task = t.PIDNamespace().TaskWithID(who) 684 } 685 686 if task == nil { 687 return 0, nil, linuxerr.ESRCH 688 } 689 690 // From kernel/sys.c:getpriority: 691 // "To avoid negative return values, 'getpriority()' 692 // will not return the normal nice-value, but a negated 693 // value that has been offset by 20" 694 return uintptr(20 - task.Niceness()), nil, nil 695 case linux.PRIO_USER: 696 fallthrough 697 case linux.PRIO_PGRP: 698 // PRIO_USER and PRIO_PGRP have no further implementation yet. 699 return 0, nil, nil 700 default: 701 return 0, nil, linuxerr.EINVAL 702 } 703 } 704 705 // Setpriority pretends to implement the linux syscall setpriority(2). 706 // 707 // This is a stub; real priorities require a full scheduler. 708 func Setpriority(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 709 which := args[0].Int() 710 who := kernel.ThreadID(args[1].Int()) 711 niceval := int(args[2].Int()) 712 713 // In the kernel's implementation, values outside the range 714 // of [-20, 19] are truncated to these minimum and maximum 715 // values. 716 if niceval < -20 /* min niceval */ { 717 niceval = -20 718 } else if niceval > 19 /* max niceval */ { 719 niceval = 19 720 } 721 722 switch which { 723 case linux.PRIO_PROCESS: 724 // Look for who, return ESRCH if not found. 725 var task *kernel.Task 726 if who == 0 { 727 task = t 728 } else { 729 task = t.PIDNamespace().TaskWithID(who) 730 } 731 732 if task == nil { 733 return 0, nil, linuxerr.ESRCH 734 } 735 736 task.SetNiceness(niceval) 737 case linux.PRIO_USER: 738 fallthrough 739 case linux.PRIO_PGRP: 740 // PRIO_USER and PRIO_PGRP have no further implementation yet. 741 return 0, nil, nil 742 default: 743 return 0, nil, linuxerr.EINVAL 744 } 745 746 return 0, nil, nil 747 } 748 749 // Ptrace implements linux system call ptrace(2). 750 func Ptrace(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 751 req := args[0].Int64() 752 pid := kernel.ThreadID(args[1].Int()) 753 addr := args[2].Pointer() 754 data := args[3].Pointer() 755 756 return 0, nil, t.Ptrace(req, pid, addr, data) 757 }