github.com/code-reading/golang@v0.0.0-20220303082512-ba5bc0e589a3/go/src/syscall/exec_linux.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build linux 6 // +build linux 7 8 package syscall 9 10 import ( 11 "internal/itoa" 12 "runtime" 13 "unsafe" 14 ) 15 16 // SysProcIDMap holds Container ID to Host ID mappings used for User Namespaces in Linux. 17 // See user_namespaces(7). 18 type SysProcIDMap struct { 19 ContainerID int // Container ID. 20 HostID int // Host ID. 21 Size int // Size. 22 } 23 24 type SysProcAttr struct { 25 Chroot string // Chroot. 26 Credential *Credential // Credential. 27 // Ptrace tells the child to call ptrace(PTRACE_TRACEME). 28 // Call runtime.LockOSThread before starting a process with this set, 29 // and don't call UnlockOSThread until done with PtraceSyscall calls. 30 Ptrace bool 31 Setsid bool // Create session. 32 // Setpgid sets the process group ID of the child to Pgid, 33 // or, if Pgid == 0, to the new child's process ID. 34 Setpgid bool 35 // Setctty sets the controlling terminal of the child to 36 // file descriptor Ctty. Ctty must be a descriptor number 37 // in the child process: an index into ProcAttr.Files. 38 // This is only meaningful if Setsid is true. 39 Setctty bool 40 Noctty bool // Detach fd 0 from controlling terminal 41 Ctty int // Controlling TTY fd 42 // Foreground places the child process group in the foreground. 43 // This implies Setpgid. The Ctty field must be set to 44 // the descriptor of the controlling TTY. 45 // Unlike Setctty, in this case Ctty must be a descriptor 46 // number in the parent process. 47 Foreground bool 48 Pgid int // Child's process group ID if Setpgid. 49 Pdeathsig Signal // Signal that the process will get when its parent dies (Linux only) 50 Cloneflags uintptr // Flags for clone calls (Linux only) 51 Unshareflags uintptr // Flags for unshare calls (Linux only) 52 UidMappings []SysProcIDMap // User ID mappings for user namespaces. 53 GidMappings []SysProcIDMap // Group ID mappings for user namespaces. 54 // GidMappingsEnableSetgroups enabling setgroups syscall. 55 // If false, then setgroups syscall will be disabled for the child process. 56 // This parameter is no-op if GidMappings == nil. Otherwise for unprivileged 57 // users this should be set to false for mappings work. 58 GidMappingsEnableSetgroups bool 59 AmbientCaps []uintptr // Ambient capabilities (Linux only) 60 } 61 62 var ( 63 none = [...]byte{'n', 'o', 'n', 'e', 0} 64 slash = [...]byte{'/', 0} 65 ) 66 67 // Implemented in runtime package. 68 func runtime_BeforeFork() 69 func runtime_AfterFork() 70 func runtime_AfterForkInChild() 71 72 // Fork, dup fd onto 0..len(fd), and exec(argv0, argvv, envv) in child. 73 // If a dup or exec fails, write the errno error to pipe. 74 // (Pipe is close-on-exec so if exec succeeds, it will be closed.) 75 // In the child, this function must not acquire any locks, because 76 // they might have been locked at the time of the fork. This means 77 // no rescheduling, no malloc calls, and no new stack segments. 78 // For the same reason compiler does not race instrument it. 79 // The calls to RawSyscall are okay because they are assembly 80 // functions that do not grow the stack. 81 //go:norace 82 func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) { 83 // Set up and fork. This returns immediately in the parent or 84 // if there's an error. 85 r1, err1, p, locked := forkAndExecInChild1(argv0, argv, envv, chroot, dir, attr, sys, pipe) 86 if locked { 87 runtime_AfterFork() 88 } 89 if err1 != 0 { 90 return 0, err1 91 } 92 93 // parent; return PID 94 pid = int(r1) 95 96 if sys.UidMappings != nil || sys.GidMappings != nil { 97 Close(p[0]) 98 var err2 Errno 99 // uid/gid mappings will be written after fork and unshare(2) for user 100 // namespaces. 101 if sys.Unshareflags&CLONE_NEWUSER == 0 { 102 if err := writeUidGidMappings(pid, sys); err != nil { 103 err2 = err.(Errno) 104 } 105 } 106 RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2)) 107 Close(p[1]) 108 } 109 110 return pid, 0 111 } 112 113 const _LINUX_CAPABILITY_VERSION_3 = 0x20080522 114 115 type capHeader struct { 116 version uint32 117 pid int32 118 } 119 120 type capData struct { 121 effective uint32 122 permitted uint32 123 inheritable uint32 124 } 125 type caps struct { 126 hdr capHeader 127 data [2]capData 128 } 129 130 // See CAP_TO_INDEX in linux/capability.h: 131 func capToIndex(cap uintptr) uintptr { return cap >> 5 } 132 133 // See CAP_TO_MASK in linux/capability.h: 134 func capToMask(cap uintptr) uint32 { return 1 << uint(cap&31) } 135 136 // forkAndExecInChild1 implements the body of forkAndExecInChild up to 137 // the parent's post-fork path. This is a separate function so we can 138 // separate the child's and parent's stack frames if we're using 139 // vfork. 140 // 141 // This is go:noinline because the point is to keep the stack frames 142 // of this and forkAndExecInChild separate. 143 // 144 //go:noinline 145 //go:norace 146 func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (r1 uintptr, err1 Errno, p [2]int, locked bool) { 147 // Defined in linux/prctl.h starting with Linux 4.3. 148 const ( 149 PR_CAP_AMBIENT = 0x2f 150 PR_CAP_AMBIENT_RAISE = 0x2 151 ) 152 153 // vfork requires that the child not touch any of the parent's 154 // active stack frames. Hence, the child does all post-fork 155 // processing in this stack frame and never returns, while the 156 // parent returns immediately from this frame and does all 157 // post-fork processing in the outer frame. 158 // Declare all variables at top in case any 159 // declarations require heap allocation (e.g., err1). 160 var ( 161 err2 Errno 162 nextfd int 163 i int 164 caps caps 165 fd1 uintptr 166 puid, psetgroups, pgid []byte 167 uidmap, setgroups, gidmap []byte 168 ) 169 170 if sys.UidMappings != nil { 171 puid = []byte("/proc/self/uid_map\000") 172 uidmap = formatIDMappings(sys.UidMappings) 173 } 174 175 if sys.GidMappings != nil { 176 psetgroups = []byte("/proc/self/setgroups\000") 177 pgid = []byte("/proc/self/gid_map\000") 178 179 if sys.GidMappingsEnableSetgroups { 180 setgroups = []byte("allow\000") 181 } else { 182 setgroups = []byte("deny\000") 183 } 184 gidmap = formatIDMappings(sys.GidMappings) 185 } 186 187 // Record parent PID so child can test if it has died. 188 ppid, _ := rawSyscallNoError(SYS_GETPID, 0, 0, 0) 189 190 // Guard against side effects of shuffling fds below. 191 // Make sure that nextfd is beyond any currently open files so 192 // that we can't run the risk of overwriting any of them. 193 fd := make([]int, len(attr.Files)) 194 nextfd = len(attr.Files) 195 for i, ufd := range attr.Files { 196 if nextfd < int(ufd) { 197 nextfd = int(ufd) 198 } 199 fd[i] = int(ufd) 200 } 201 nextfd++ 202 203 // Allocate another pipe for parent to child communication for 204 // synchronizing writing of User ID/Group ID mappings. 205 if sys.UidMappings != nil || sys.GidMappings != nil { 206 if err := forkExecPipe(p[:]); err != nil { 207 err1 = err.(Errno) 208 return 209 } 210 } 211 212 // About to call fork. 213 // No more allocation or calls of non-assembly functions. 214 runtime_BeforeFork() 215 locked = true 216 switch { 217 case sys.Cloneflags&CLONE_NEWUSER == 0 && sys.Unshareflags&CLONE_NEWUSER == 0: 218 r1, err1 = rawVforkSyscall(SYS_CLONE, uintptr(SIGCHLD|CLONE_VFORK|CLONE_VM)|sys.Cloneflags) 219 case runtime.GOARCH == "s390x": 220 r1, _, err1 = RawSyscall6(SYS_CLONE, 0, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0) 221 default: 222 r1, _, err1 = RawSyscall6(SYS_CLONE, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0, 0) 223 } 224 if err1 != 0 || r1 != 0 { 225 // If we're in the parent, we must return immediately 226 // so we're not in the same stack frame as the child. 227 // This can at most use the return PC, which the child 228 // will not modify, and the results of 229 // rawVforkSyscall, which must have been written after 230 // the child was replaced. 231 return 232 } 233 234 // Fork succeeded, now in child. 235 236 // Enable the "keep capabilities" flag to set ambient capabilities later. 237 if len(sys.AmbientCaps) > 0 { 238 _, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_KEEPCAPS, 1, 0, 0, 0, 0) 239 if err1 != 0 { 240 goto childerror 241 } 242 } 243 244 // Wait for User ID/Group ID mappings to be written. 245 if sys.UidMappings != nil || sys.GidMappings != nil { 246 if _, _, err1 = RawSyscall(SYS_CLOSE, uintptr(p[1]), 0, 0); err1 != 0 { 247 goto childerror 248 } 249 r1, _, err1 = RawSyscall(SYS_READ, uintptr(p[0]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2)) 250 if err1 != 0 { 251 goto childerror 252 } 253 if r1 != unsafe.Sizeof(err2) { 254 err1 = EINVAL 255 goto childerror 256 } 257 if err2 != 0 { 258 err1 = err2 259 goto childerror 260 } 261 } 262 263 // Session ID 264 if sys.Setsid { 265 _, _, err1 = RawSyscall(SYS_SETSID, 0, 0, 0) 266 if err1 != 0 { 267 goto childerror 268 } 269 } 270 271 // Set process group 272 if sys.Setpgid || sys.Foreground { 273 // Place child in process group. 274 _, _, err1 = RawSyscall(SYS_SETPGID, 0, uintptr(sys.Pgid), 0) 275 if err1 != 0 { 276 goto childerror 277 } 278 } 279 280 if sys.Foreground { 281 pgrp := int32(sys.Pgid) 282 if pgrp == 0 { 283 r1, _ = rawSyscallNoError(SYS_GETPID, 0, 0, 0) 284 285 pgrp = int32(r1) 286 } 287 288 // Place process group in foreground. 289 _, _, err1 = RawSyscall(SYS_IOCTL, uintptr(sys.Ctty), uintptr(TIOCSPGRP), uintptr(unsafe.Pointer(&pgrp))) 290 if err1 != 0 { 291 goto childerror 292 } 293 } 294 295 // Restore the signal mask. We do this after TIOCSPGRP to avoid 296 // having the kernel send a SIGTTOU signal to the process group. 297 runtime_AfterForkInChild() 298 299 // Unshare 300 if sys.Unshareflags != 0 { 301 _, _, err1 = RawSyscall(SYS_UNSHARE, sys.Unshareflags, 0, 0) 302 if err1 != 0 { 303 goto childerror 304 } 305 306 if sys.Unshareflags&CLONE_NEWUSER != 0 && sys.GidMappings != nil { 307 dirfd := int(_AT_FDCWD) 308 if fd1, _, err1 = RawSyscall6(SYS_OPENAT, uintptr(dirfd), uintptr(unsafe.Pointer(&psetgroups[0])), uintptr(O_WRONLY), 0, 0, 0); err1 != 0 { 309 goto childerror 310 } 311 r1, _, err1 = RawSyscall(SYS_WRITE, uintptr(fd1), uintptr(unsafe.Pointer(&setgroups[0])), uintptr(len(setgroups))) 312 if err1 != 0 { 313 goto childerror 314 } 315 if _, _, err1 = RawSyscall(SYS_CLOSE, uintptr(fd1), 0, 0); err1 != 0 { 316 goto childerror 317 } 318 319 if fd1, _, err1 = RawSyscall6(SYS_OPENAT, uintptr(dirfd), uintptr(unsafe.Pointer(&pgid[0])), uintptr(O_WRONLY), 0, 0, 0); err1 != 0 { 320 goto childerror 321 } 322 r1, _, err1 = RawSyscall(SYS_WRITE, uintptr(fd1), uintptr(unsafe.Pointer(&gidmap[0])), uintptr(len(gidmap))) 323 if err1 != 0 { 324 goto childerror 325 } 326 if _, _, err1 = RawSyscall(SYS_CLOSE, uintptr(fd1), 0, 0); err1 != 0 { 327 goto childerror 328 } 329 } 330 331 if sys.Unshareflags&CLONE_NEWUSER != 0 && sys.UidMappings != nil { 332 dirfd := int(_AT_FDCWD) 333 if fd1, _, err1 = RawSyscall6(SYS_OPENAT, uintptr(dirfd), uintptr(unsafe.Pointer(&puid[0])), uintptr(O_WRONLY), 0, 0, 0); err1 != 0 { 334 goto childerror 335 } 336 r1, _, err1 = RawSyscall(SYS_WRITE, uintptr(fd1), uintptr(unsafe.Pointer(&uidmap[0])), uintptr(len(uidmap))) 337 if err1 != 0 { 338 goto childerror 339 } 340 if _, _, err1 = RawSyscall(SYS_CLOSE, uintptr(fd1), 0, 0); err1 != 0 { 341 goto childerror 342 } 343 } 344 345 // The unshare system call in Linux doesn't unshare mount points 346 // mounted with --shared. Systemd mounts / with --shared. For a 347 // long discussion of the pros and cons of this see debian bug 739593. 348 // The Go model of unsharing is more like Plan 9, where you ask 349 // to unshare and the namespaces are unconditionally unshared. 350 // To make this model work we must further mark / as MS_PRIVATE. 351 // This is what the standard unshare command does. 352 if sys.Unshareflags&CLONE_NEWNS == CLONE_NEWNS { 353 _, _, err1 = RawSyscall6(SYS_MOUNT, uintptr(unsafe.Pointer(&none[0])), uintptr(unsafe.Pointer(&slash[0])), 0, MS_REC|MS_PRIVATE, 0, 0) 354 if err1 != 0 { 355 goto childerror 356 } 357 } 358 } 359 360 // Chroot 361 if chroot != nil { 362 _, _, err1 = RawSyscall(SYS_CHROOT, uintptr(unsafe.Pointer(chroot)), 0, 0) 363 if err1 != 0 { 364 goto childerror 365 } 366 } 367 368 // User and groups 369 if cred := sys.Credential; cred != nil { 370 ngroups := uintptr(len(cred.Groups)) 371 groups := uintptr(0) 372 if ngroups > 0 { 373 groups = uintptr(unsafe.Pointer(&cred.Groups[0])) 374 } 375 if !(sys.GidMappings != nil && !sys.GidMappingsEnableSetgroups && ngroups == 0) && !cred.NoSetGroups { 376 _, _, err1 = RawSyscall(_SYS_setgroups, ngroups, groups, 0) 377 if err1 != 0 { 378 goto childerror 379 } 380 } 381 _, _, err1 = RawSyscall(sys_SETGID, uintptr(cred.Gid), 0, 0) 382 if err1 != 0 { 383 goto childerror 384 } 385 _, _, err1 = RawSyscall(sys_SETUID, uintptr(cred.Uid), 0, 0) 386 if err1 != 0 { 387 goto childerror 388 } 389 } 390 391 if len(sys.AmbientCaps) != 0 { 392 // Ambient capabilities were added in the 4.3 kernel, 393 // so it is safe to always use _LINUX_CAPABILITY_VERSION_3. 394 caps.hdr.version = _LINUX_CAPABILITY_VERSION_3 395 396 if _, _, err1 := RawSyscall(SYS_CAPGET, uintptr(unsafe.Pointer(&caps.hdr)), uintptr(unsafe.Pointer(&caps.data[0])), 0); err1 != 0 { 397 goto childerror 398 } 399 400 for _, c := range sys.AmbientCaps { 401 // Add the c capability to the permitted and inheritable capability mask, 402 // otherwise we will not be able to add it to the ambient capability mask. 403 caps.data[capToIndex(c)].permitted |= capToMask(c) 404 caps.data[capToIndex(c)].inheritable |= capToMask(c) 405 } 406 407 if _, _, err1 := RawSyscall(SYS_CAPSET, uintptr(unsafe.Pointer(&caps.hdr)), uintptr(unsafe.Pointer(&caps.data[0])), 0); err1 != 0 { 408 goto childerror 409 } 410 411 for _, c := range sys.AmbientCaps { 412 _, _, err1 = RawSyscall6(SYS_PRCTL, PR_CAP_AMBIENT, uintptr(PR_CAP_AMBIENT_RAISE), c, 0, 0, 0) 413 if err1 != 0 { 414 goto childerror 415 } 416 } 417 } 418 419 // Chdir 420 if dir != nil { 421 _, _, err1 = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0) 422 if err1 != 0 { 423 goto childerror 424 } 425 } 426 427 // Parent death signal 428 if sys.Pdeathsig != 0 { 429 _, _, err1 = RawSyscall6(SYS_PRCTL, PR_SET_PDEATHSIG, uintptr(sys.Pdeathsig), 0, 0, 0, 0) 430 if err1 != 0 { 431 goto childerror 432 } 433 434 // Signal self if parent is already dead. This might cause a 435 // duplicate signal in rare cases, but it won't matter when 436 // using SIGKILL. 437 r1, _ = rawSyscallNoError(SYS_GETPPID, 0, 0, 0) 438 if r1 != ppid { 439 pid, _ := rawSyscallNoError(SYS_GETPID, 0, 0, 0) 440 _, _, err1 := RawSyscall(SYS_KILL, pid, uintptr(sys.Pdeathsig), 0) 441 if err1 != 0 { 442 goto childerror 443 } 444 } 445 } 446 447 // Pass 1: look for fd[i] < i and move those up above len(fd) 448 // so that pass 2 won't stomp on an fd it needs later. 449 if pipe < nextfd { 450 _, _, err1 = RawSyscall(SYS_DUP3, uintptr(pipe), uintptr(nextfd), O_CLOEXEC) 451 if _SYS_dup != SYS_DUP3 && err1 == ENOSYS { 452 _, _, err1 = RawSyscall(_SYS_dup, uintptr(pipe), uintptr(nextfd), 0) 453 if err1 != 0 { 454 goto childerror 455 } 456 RawSyscall(fcntl64Syscall, uintptr(nextfd), F_SETFD, FD_CLOEXEC) 457 } else if err1 != 0 { 458 goto childerror 459 } 460 pipe = nextfd 461 nextfd++ 462 } 463 for i = 0; i < len(fd); i++ { 464 if fd[i] >= 0 && fd[i] < int(i) { 465 if nextfd == pipe { // don't stomp on pipe 466 nextfd++ 467 } 468 _, _, err1 = RawSyscall(SYS_DUP3, uintptr(fd[i]), uintptr(nextfd), O_CLOEXEC) 469 if _SYS_dup != SYS_DUP3 && err1 == ENOSYS { 470 _, _, err1 = RawSyscall(_SYS_dup, uintptr(fd[i]), uintptr(nextfd), 0) 471 if err1 != 0 { 472 goto childerror 473 } 474 RawSyscall(fcntl64Syscall, uintptr(nextfd), F_SETFD, FD_CLOEXEC) 475 } else if err1 != 0 { 476 goto childerror 477 } 478 fd[i] = nextfd 479 nextfd++ 480 } 481 } 482 483 // Pass 2: dup fd[i] down onto i. 484 for i = 0; i < len(fd); i++ { 485 if fd[i] == -1 { 486 RawSyscall(SYS_CLOSE, uintptr(i), 0, 0) 487 continue 488 } 489 if fd[i] == int(i) { 490 // dup2(i, i) won't clear close-on-exec flag on Linux, 491 // probably not elsewhere either. 492 _, _, err1 = RawSyscall(fcntl64Syscall, uintptr(fd[i]), F_SETFD, 0) 493 if err1 != 0 { 494 goto childerror 495 } 496 continue 497 } 498 // The new fd is created NOT close-on-exec, 499 // which is exactly what we want. 500 _, _, err1 = RawSyscall(_SYS_dup, uintptr(fd[i]), uintptr(i), 0) 501 if err1 != 0 { 502 goto childerror 503 } 504 } 505 506 // By convention, we don't close-on-exec the fds we are 507 // started with, so if len(fd) < 3, close 0, 1, 2 as needed. 508 // Programs that know they inherit fds >= 3 will need 509 // to set them close-on-exec. 510 for i = len(fd); i < 3; i++ { 511 RawSyscall(SYS_CLOSE, uintptr(i), 0, 0) 512 } 513 514 // Detach fd 0 from tty 515 if sys.Noctty { 516 _, _, err1 = RawSyscall(SYS_IOCTL, 0, uintptr(TIOCNOTTY), 0) 517 if err1 != 0 { 518 goto childerror 519 } 520 } 521 522 // Set the controlling TTY to Ctty 523 if sys.Setctty { 524 _, _, err1 = RawSyscall(SYS_IOCTL, uintptr(sys.Ctty), uintptr(TIOCSCTTY), 1) 525 if err1 != 0 { 526 goto childerror 527 } 528 } 529 530 // Enable tracing if requested. 531 // Do this right before exec so that we don't unnecessarily trace the runtime 532 // setting up after the fork. See issue #21428. 533 if sys.Ptrace { 534 _, _, err1 = RawSyscall(SYS_PTRACE, uintptr(PTRACE_TRACEME), 0, 0) 535 if err1 != 0 { 536 goto childerror 537 } 538 } 539 540 // Time to exec. 541 _, _, err1 = RawSyscall(SYS_EXECVE, 542 uintptr(unsafe.Pointer(argv0)), 543 uintptr(unsafe.Pointer(&argv[0])), 544 uintptr(unsafe.Pointer(&envv[0]))) 545 546 childerror: 547 // send error code on pipe 548 RawSyscall(SYS_WRITE, uintptr(pipe), uintptr(unsafe.Pointer(&err1)), unsafe.Sizeof(err1)) 549 for { 550 RawSyscall(SYS_EXIT, 253, 0, 0) 551 } 552 } 553 554 // Try to open a pipe with O_CLOEXEC set on both file descriptors. 555 func forkExecPipe(p []int) (err error) { 556 err = Pipe2(p, O_CLOEXEC) 557 // pipe2 was added in 2.6.27 and our minimum requirement is 2.6.23, so it 558 // might not be implemented. 559 if err == ENOSYS { 560 if err = Pipe(p); err != nil { 561 return 562 } 563 if _, err = fcntl(p[0], F_SETFD, FD_CLOEXEC); err != nil { 564 return 565 } 566 _, err = fcntl(p[1], F_SETFD, FD_CLOEXEC) 567 } 568 return 569 } 570 571 func formatIDMappings(idMap []SysProcIDMap) []byte { 572 var data []byte 573 for _, im := range idMap { 574 data = append(data, []byte(itoa.Itoa(im.ContainerID)+" "+itoa.Itoa(im.HostID)+" "+itoa.Itoa(im.Size)+"\n")...) 575 } 576 return data 577 } 578 579 // writeIDMappings writes the user namespace User ID or Group ID mappings to the specified path. 580 func writeIDMappings(path string, idMap []SysProcIDMap) error { 581 fd, err := Open(path, O_RDWR, 0) 582 if err != nil { 583 return err 584 } 585 586 if _, err := Write(fd, formatIDMappings(idMap)); err != nil { 587 Close(fd) 588 return err 589 } 590 591 if err := Close(fd); err != nil { 592 return err 593 } 594 595 return nil 596 } 597 598 // writeSetgroups writes to /proc/PID/setgroups "deny" if enable is false 599 // and "allow" if enable is true. 600 // This is needed since kernel 3.19, because you can't write gid_map without 601 // disabling setgroups() system call. 602 func writeSetgroups(pid int, enable bool) error { 603 sgf := "/proc/" + itoa.Itoa(pid) + "/setgroups" 604 fd, err := Open(sgf, O_RDWR, 0) 605 if err != nil { 606 return err 607 } 608 609 var data []byte 610 if enable { 611 data = []byte("allow") 612 } else { 613 data = []byte("deny") 614 } 615 616 if _, err := Write(fd, data); err != nil { 617 Close(fd) 618 return err 619 } 620 621 return Close(fd) 622 } 623 624 // writeUidGidMappings writes User ID and Group ID mappings for user namespaces 625 // for a process and it is called from the parent process. 626 func writeUidGidMappings(pid int, sys *SysProcAttr) error { 627 if sys.UidMappings != nil { 628 uidf := "/proc/" + itoa.Itoa(pid) + "/uid_map" 629 if err := writeIDMappings(uidf, sys.UidMappings); err != nil { 630 return err 631 } 632 } 633 634 if sys.GidMappings != nil { 635 // If the kernel is too old to support /proc/PID/setgroups, writeSetGroups will return ENOENT; this is OK. 636 if err := writeSetgroups(pid, sys.GidMappingsEnableSetgroups); err != nil && err != ENOENT { 637 return err 638 } 639 gidf := "/proc/" + itoa.Itoa(pid) + "/gid_map" 640 if err := writeIDMappings(gidf, sys.GidMappings); err != nil { 641 return err 642 } 643 } 644 645 return nil 646 }