github.com/criyle/go-sandbox@v0.10.3/pkg/forkexec/fork_linux.go (about) 1 package forkexec 2 3 import ( 4 "syscall" 5 "unsafe" // required for go:linkname. 6 7 "golang.org/x/sys/unix" 8 ) 9 10 // Start will fork, load seccomp and execve and being traced by ptrace 11 // Return pid and potential error 12 // The runtime OS thread must be locked before calling this function 13 // if ptrace is set to true 14 func (r *Runner) Start() (int, error) { 15 argv0, argv, env, err := prepareExec(r.Args, r.Env) 16 if err != nil { 17 return 0, err 18 } 19 20 // prepare work dir 21 workdir, err := syscallStringFromString(r.WorkDir) 22 if err != nil { 23 return 0, err 24 } 25 26 // prepare hostname 27 hostname, err := syscallStringFromString(r.HostName) 28 if err != nil { 29 return 0, err 30 } 31 32 // prepare domainname 33 domainname, err := syscallStringFromString(r.DomainName) 34 if err != nil { 35 return 0, err 36 } 37 38 // prepare pivot_root param 39 pivotRoot, err := syscallStringFromString(r.PivotRoot) 40 if err != nil { 41 return 0, err 42 } 43 44 // socketpair p used to notify child the uid / gid mapping have been setup 45 // socketpair p is also used to sync with parent before final execve 46 // p[0] is used by parent and p[1] is used by child 47 p, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) 48 if err != nil { 49 return 0, err 50 } 51 52 // fork in child 53 pid, err1 := forkAndExecInChild(r, argv0, argv, env, workdir, hostname, domainname, pivotRoot, p) 54 55 // restore all signals 56 afterFork() 57 syscall.ForkLock.Unlock() 58 59 return syncWithChild(r, p, int(pid), err1) 60 } 61 62 func syncWithChild(r *Runner, p [2]int, pid int, err1 syscall.Errno) (int, error) { 63 var ( 64 err2 syscall.Errno 65 err error 66 unshareUser = r.CloneFlags&unix.CLONE_NEWUSER == unix.CLONE_NEWUSER 67 childErr ChildError 68 ) 69 70 // sync with child 71 unix.Close(p[1]) 72 73 // clone syscall failed 74 if err1 != 0 { 75 unix.Close(p[0]) 76 childErr.Location = LocClone 77 childErr.Err = err1 78 return 0, childErr 79 } 80 81 // synchronize with child for uid / gid map 82 if unshareUser { 83 if err = writeIDMaps(r, int(pid)); err != nil { 84 err2 = err.(syscall.Errno) 85 } 86 syscall.RawSyscall(syscall.SYS_WRITE, uintptr(p[0]), uintptr(unsafe.Pointer(&err2)), uintptr(unsafe.Sizeof(err2))) 87 } 88 89 n, err := readChildErr(p[0], &childErr) 90 // child returned error code 91 if (n != int(unsafe.Sizeof(err2)) && n != int(unsafe.Sizeof(childErr))) || childErr.Err != 0 || err != nil { 92 childErr.Err = handlePipeError(n, childErr.Err) 93 goto fail 94 } 95 96 // if syncfunc return error, then fail child immediately 97 if r.SyncFunc != nil { 98 if err = r.SyncFunc(int(pid)); err != nil { 99 goto fail 100 } 101 } 102 // otherwise, ack child (err1 == 0) 103 syscall.RawSyscall(syscall.SYS_WRITE, uintptr(p[0]), uintptr(unsafe.Pointer(&err1)), uintptr(unsafe.Sizeof(err1))) 104 105 // if stopped before execve by signal SIGSTOP or PTRACE_ME, then do not wait until execve 106 if r.Ptrace || r.StopBeforeSeccomp { 107 // let's wait it in another goroutine to avoid SIGPIPE 108 go func() { 109 readChildErr(p[0], &childErr) 110 unix.Close(p[0]) 111 }() 112 return int(pid), nil 113 } 114 115 // if read anything mean child failed after sync (close_on_exec so it should not block) 116 n, err = readChildErr(p[0], &childErr) 117 unix.Close(p[0]) 118 if n != 0 || err != nil { 119 childErr.Err = handlePipeError(n, childErr.Err) 120 goto failAfterClose 121 } 122 return int(pid), nil 123 124 fail: 125 unix.Close(p[0]) 126 127 failAfterClose: 128 handleChildFailed(int(pid)) 129 if childErr.Err == 0 { 130 return 0, err 131 } 132 return 0, childErr 133 } 134 135 func readChildErr(fd int, childErr *ChildError) (n int, err error) { 136 for { 137 n, err = readlen(fd, (*byte)(unsafe.Pointer(childErr)), int(unsafe.Sizeof(*childErr))) 138 if err != syscall.EINTR { 139 break 140 } 141 } 142 return 143 } 144 145 // https://cs.opensource.google/go/go/+/refs/tags/go1.18.1:src/syscall/zsyscall_linux_amd64.go;l=944 146 func readlen(fd int, p *byte, np int) (n int, err error) { 147 r0, _, e1 := syscall.Syscall(syscall.SYS_READ, uintptr(fd), uintptr(unsafe.Pointer(p)), uintptr(np)) 148 n = int(r0) 149 if e1 != 0 { 150 err = syscall.Errno(e1) 151 } 152 return 153 } 154 155 // check pipe error 156 func handlePipeError(r1 int, errno syscall.Errno) syscall.Errno { 157 if uintptr(r1) >= unsafe.Sizeof(errno) { 158 return syscall.Errno(errno) 159 } 160 return syscall.EPIPE 161 } 162 163 func handleChildFailed(pid int) { 164 var wstatus syscall.WaitStatus 165 // make sure not blocked 166 syscall.Kill(pid, syscall.SIGKILL) 167 // child failed; wait for it to exit, to make sure the zombies don't accumulate 168 _, err := syscall.Wait4(pid, &wstatus, 0, nil) 169 for err == syscall.EINTR { 170 _, err = syscall.Wait4(pid, &wstatus, 0, nil) 171 } 172 }