github.com/llvm-mirror/llgo@v0.0.0-20190322182713-bf6f0a60fce1/third_party/gofrontend/libgo/go/syscall/exec_linux.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build linux 6 7 package syscall 8 9 import ( 10 "runtime" 11 "unsafe" 12 ) 13 14 //sysnb raw_prctl(option int, arg2 int, arg3 int, arg4 int, arg5 int) (ret int, err Errno) 15 //prctl(option _C_int, arg2 _C_long, arg3 _C_long, arg4 _C_long, arg5 _C_long) _C_int 16 17 // SysProcIDMap holds Container ID to Host ID mappings used for User Namespaces in Linux. 18 // See user_namespaces(7). 19 type SysProcIDMap struct { 20 ContainerID int // Container ID. 21 HostID int // Host ID. 22 Size int // Size. 23 } 24 25 type SysProcAttr struct { 26 Chroot string // Chroot. 27 Credential *Credential // Credential. 28 Ptrace bool // Enable tracing. 29 Setsid bool // Create session. 30 Setpgid bool // Set process group ID to Pgid, or, if Pgid == 0, to new pid. 31 Setctty bool // Set controlling terminal to fd Ctty (only meaningful if Setsid is set) 32 Noctty bool // Detach fd 0 from controlling terminal 33 Ctty int // Controlling TTY fd 34 Foreground bool // Place child's process group in foreground. (Implies Setpgid. Uses Ctty as fd of controlling TTY) 35 Pgid int // Child's process group ID if Setpgid. 36 Pdeathsig Signal // Signal that the process will get when its parent dies (Linux only) 37 Cloneflags uintptr // Flags for clone calls (Linux only) 38 UidMappings []SysProcIDMap // User ID mappings for user namespaces. 39 GidMappings []SysProcIDMap // Group ID mappings for user namespaces. 40 // GidMappingsEnableSetgroups enabling setgroups syscall. 41 // If false, then setgroups syscall will be disabled for the child process. 42 // This parameter is no-op if GidMappings == nil. Otherwise for unprivileged 43 // users this should be set to false for mappings work. 44 GidMappingsEnableSetgroups bool 45 } 46 47 // Implemented in runtime package. 48 func runtime_BeforeFork() 49 func runtime_AfterFork() 50 51 // Fork, dup fd onto 0..len(fd), and exec(argv0, argvv, envv) in child. 52 // If a dup or exec fails, write the errno error to pipe. 53 // (Pipe is close-on-exec so if exec succeeds, it will be closed.) 54 // In the child, this function must not acquire any locks, because 55 // they might have been locked at the time of the fork. This means 56 // no rescheduling, no malloc calls, and no new stack segments. 57 // For the same reason compiler does not race instrument it. 58 // The calls to RawSyscall are okay because they are assembly 59 // functions that do not grow the stack. 60 func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) { 61 // Declare all variables at top in case any 62 // declarations require heap allocation (e.g., err1). 63 var ( 64 r1 uintptr 65 err1 Errno 66 err2 Errno 67 nextfd int 68 i int 69 p [2]int 70 ) 71 72 // Record parent PID so child can test if it has died. 73 ppid := raw_getpid() 74 75 // Guard against side effects of shuffling fds below. 76 // Make sure that nextfd is beyond any currently open files so 77 // that we can't run the risk of overwriting any of them. 78 fd := make([]int, len(attr.Files)) 79 nextfd = len(attr.Files) 80 for i, ufd := range attr.Files { 81 if nextfd < int(ufd) { 82 nextfd = int(ufd) 83 } 84 fd[i] = int(ufd) 85 } 86 nextfd++ 87 88 // Allocate another pipe for parent to child communication for 89 // synchronizing writing of User ID/Group ID mappings. 90 if sys.UidMappings != nil || sys.GidMappings != nil { 91 if err := forkExecPipe(p[:]); err != nil { 92 return 0, err.(Errno) 93 } 94 } 95 96 // About to call fork. 97 // No more allocation or calls of non-assembly functions. 98 runtime_BeforeFork() 99 if runtime.GOARCH == "s390x" || runtime.GOARCH == "s390" { 100 r1, _, err1 = RawSyscall6(SYS_CLONE, 0, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0) 101 } else { 102 r1, _, err1 = RawSyscall6(SYS_CLONE, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0, 0) 103 } 104 if err1 != 0 { 105 runtime_AfterFork() 106 return 0, err1 107 } 108 109 if r1 != 0 { 110 // parent; return PID 111 runtime_AfterFork() 112 pid = int(r1) 113 114 if sys.UidMappings != nil || sys.GidMappings != nil { 115 Close(p[0]) 116 err := writeUidGidMappings(pid, sys) 117 if err != nil { 118 err2 = err.(Errno) 119 } 120 RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2)) 121 Close(p[1]) 122 } 123 124 return pid, 0 125 } 126 127 // Fork succeeded, now in child. 128 129 // Wait for User ID/Group ID mappings to be written. 130 if sys.UidMappings != nil || sys.GidMappings != nil { 131 if _, _, err1 = RawSyscall(SYS_CLOSE, uintptr(p[1]), 0, 0); err1 != 0 { 132 goto childerror 133 } 134 r1, _, err1 = RawSyscall(SYS_READ, uintptr(p[0]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2)) 135 if err1 != 0 { 136 goto childerror 137 } 138 if r1 != unsafe.Sizeof(err2) { 139 err1 = EINVAL 140 goto childerror 141 } 142 if err2 != 0 { 143 err1 = err2 144 goto childerror 145 } 146 } 147 148 // Enable tracing if requested. 149 if sys.Ptrace { 150 err1 = raw_ptrace(_PTRACE_TRACEME, 0, nil, nil) 151 if err1 != 0 { 152 goto childerror 153 } 154 } 155 156 // Session ID 157 if sys.Setsid { 158 err1 = raw_setsid() 159 if err1 != 0 { 160 goto childerror 161 } 162 } 163 164 // Set process group 165 if sys.Setpgid || sys.Foreground { 166 // Place child in process group. 167 err1 = raw_setpgid(0, sys.Pgid) 168 if err1 != 0 { 169 goto childerror 170 } 171 } 172 173 if sys.Foreground { 174 pgrp := Pid_t(sys.Pgid) 175 if pgrp == 0 { 176 pgrp = raw_getpid() 177 } 178 179 // Place process group in foreground. 180 _, err1 = raw_ioctl_ptr(sys.Ctty, TIOCSPGRP, unsafe.Pointer(&pgrp)) 181 if err1 != 0 { 182 goto childerror 183 } 184 } 185 186 // Chroot 187 if chroot != nil { 188 err1 = raw_chroot(chroot) 189 if err1 != 0 { 190 goto childerror 191 } 192 } 193 194 // User and groups 195 if cred := sys.Credential; cred != nil { 196 ngroups := len(cred.Groups) 197 if ngroups > 0 { 198 groups := unsafe.Pointer(&cred.Groups[0]) 199 err1 = raw_setgroups(ngroups, groups) 200 if err1 != 0 { 201 goto childerror 202 } 203 } 204 err1 = raw_setgid(int(cred.Gid)) 205 if err1 != 0 { 206 goto childerror 207 } 208 err1 = raw_setuid(int(cred.Uid)) 209 if err1 != 0 { 210 goto childerror 211 } 212 } 213 214 // Chdir 215 if dir != nil { 216 err1 = raw_chdir(dir) 217 if err1 != 0 { 218 goto childerror 219 } 220 } 221 222 // Parent death signal 223 if sys.Pdeathsig != 0 { 224 _, err1 = raw_prctl(PR_SET_PDEATHSIG, int(sys.Pdeathsig), 0, 0, 0) 225 if err1 != 0 { 226 goto childerror 227 } 228 229 // Signal self if parent is already dead. This might cause a 230 // duplicate signal in rare cases, but it won't matter when 231 // using SIGKILL. 232 r1 := raw_getppid() 233 if r1 != ppid { 234 pid := raw_getpid() 235 err1 = raw_kill(pid, sys.Pdeathsig) 236 if err1 != 0 { 237 goto childerror 238 } 239 } 240 } 241 242 // Pass 1: look for fd[i] < i and move those up above len(fd) 243 // so that pass 2 won't stomp on an fd it needs later. 244 if pipe < nextfd { 245 err1 = raw_dup2(pipe, nextfd) 246 if err1 != 0 { 247 goto childerror 248 } 249 raw_fcntl(nextfd, F_SETFD, FD_CLOEXEC) 250 pipe = nextfd 251 nextfd++ 252 } 253 for i = 0; i < len(fd); i++ { 254 if fd[i] >= 0 && fd[i] < int(i) { 255 err1 = raw_dup2(fd[i], nextfd) 256 if err1 != 0 { 257 goto childerror 258 } 259 raw_fcntl(nextfd, F_SETFD, FD_CLOEXEC) 260 fd[i] = nextfd 261 nextfd++ 262 if nextfd == pipe { // don't stomp on pipe 263 nextfd++ 264 } 265 } 266 } 267 268 // Pass 2: dup fd[i] down onto i. 269 for i = 0; i < len(fd); i++ { 270 if fd[i] == -1 { 271 raw_close(i) 272 continue 273 } 274 if fd[i] == int(i) { 275 // dup2(i, i) won't clear close-on-exec flag on Linux, 276 // probably not elsewhere either. 277 _, err1 = raw_fcntl(fd[i], F_SETFD, 0) 278 if err1 != 0 { 279 goto childerror 280 } 281 continue 282 } 283 // The new fd is created NOT close-on-exec, 284 // which is exactly what we want. 285 err1 = raw_dup2(fd[i], i) 286 if err1 != 0 { 287 goto childerror 288 } 289 } 290 291 // By convention, we don't close-on-exec the fds we are 292 // started with, so if len(fd) < 3, close 0, 1, 2 as needed. 293 // Programs that know they inherit fds >= 3 will need 294 // to set them close-on-exec. 295 for i = len(fd); i < 3; i++ { 296 raw_close(i) 297 } 298 299 // Detach fd 0 from tty 300 if sys.Noctty { 301 _, err1 = raw_ioctl(0, TIOCNOTTY, 0) 302 if err1 != 0 { 303 goto childerror 304 } 305 } 306 307 // Set the controlling TTY to Ctty 308 if sys.Setctty { 309 _, err1 = raw_ioctl(sys.Ctty, TIOCSCTTY, 0) 310 if err1 != 0 { 311 goto childerror 312 } 313 } 314 315 // Time to exec. 316 err1 = raw_execve(argv0, &argv[0], &envv[0]) 317 318 childerror: 319 // send error code on pipe 320 raw_write(pipe, (*byte)(unsafe.Pointer(&err1)), int(unsafe.Sizeof(err1))) 321 for { 322 raw_exit(253) 323 } 324 } 325 326 // Try to open a pipe with O_CLOEXEC set on both file descriptors. 327 func forkExecPipe(p []int) (err error) { 328 err = Pipe2(p, O_CLOEXEC) 329 // pipe2 was added in 2.6.27 and our minimum requirement is 2.6.23, so it 330 // might not be implemented. 331 if err == ENOSYS { 332 if err = Pipe(p); err != nil { 333 return 334 } 335 if _, err = fcntl(p[0], F_SETFD, FD_CLOEXEC); err != nil { 336 return 337 } 338 _, err = fcntl(p[1], F_SETFD, FD_CLOEXEC) 339 } 340 return 341 } 342 343 // writeIDMappings writes the user namespace User ID or Group ID mappings to the specified path. 344 func writeIDMappings(path string, idMap []SysProcIDMap) error { 345 fd, err := Open(path, O_RDWR, 0) 346 if err != nil { 347 return err 348 } 349 350 data := "" 351 for _, im := range idMap { 352 data = data + itoa(im.ContainerID) + " " + itoa(im.HostID) + " " + itoa(im.Size) + "\n" 353 } 354 355 bytes, err := ByteSliceFromString(data) 356 if err != nil { 357 Close(fd) 358 return err 359 } 360 361 if _, err := Write(fd, bytes); err != nil { 362 Close(fd) 363 return err 364 } 365 366 if err := Close(fd); err != nil { 367 return err 368 } 369 370 return nil 371 } 372 373 // writeSetgroups writes to /proc/PID/setgroups "deny" if enable is false 374 // and "allow" if enable is true. 375 // This is needed since kernel 3.19, because you can't write gid_map without 376 // disabling setgroups() system call. 377 func writeSetgroups(pid int, enable bool) error { 378 sgf := "/proc/" + itoa(pid) + "/setgroups" 379 fd, err := Open(sgf, O_RDWR, 0) 380 if err != nil { 381 return err 382 } 383 384 var data []byte 385 if enable { 386 data = []byte("allow") 387 } else { 388 data = []byte("deny") 389 } 390 391 if _, err := Write(fd, data); err != nil { 392 Close(fd) 393 return err 394 } 395 396 return Close(fd) 397 } 398 399 // writeUidGidMappings writes User ID and Group ID mappings for user namespaces 400 // for a process and it is called from the parent process. 401 func writeUidGidMappings(pid int, sys *SysProcAttr) error { 402 if sys.UidMappings != nil { 403 uidf := "/proc/" + itoa(pid) + "/uid_map" 404 if err := writeIDMappings(uidf, sys.UidMappings); err != nil { 405 return err 406 } 407 } 408 409 if sys.GidMappings != nil { 410 // If the kernel is too old to support /proc/PID/setgroups, writeSetGroups will return ENOENT; this is OK. 411 if err := writeSetgroups(pid, sys.GidMappingsEnableSetgroups); err != nil && err != ENOENT { 412 return err 413 } 414 gidf := "/proc/" + itoa(pid) + "/gid_map" 415 if err := writeIDMappings(gidf, sys.GidMappings); err != nil { 416 return err 417 } 418 } 419 420 return nil 421 }