github.com/criyle/go-sandbox@v0.10.3/container/environment_linux.go (about) 1 package container 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "os" 8 "os/exec" 9 "sync" 10 "syscall" 11 12 "github.com/criyle/go-sandbox/pkg/forkexec" 13 "github.com/criyle/go-sandbox/pkg/mount" 14 "github.com/criyle/go-sandbox/pkg/unixsocket" 15 "github.com/criyle/go-sandbox/runner" 16 "golang.org/x/sys/unix" 17 ) 18 19 // PathEnv defines path environment variable for the container init process 20 const PathEnv = "PATH=/usr/local/bin:/usr/bin:/bin" 21 22 // Builder builds instance of container environment 23 type Builder struct { 24 // Root is container root mount path, empty uses current work path 25 Root string 26 27 // TmpRoot defines the tmp dir pattern if not nil. Temp directory will be created as container root dir 28 TmpRoot string 29 30 // Mounts defines container mount points, empty uses default mounts 31 Mounts []mount.Mount 32 33 // SymbolicLinks defines symlinks to be created after mount file system 34 SymbolicLinks []SymbolicLink 35 36 // MaskPaths defines paths to be masked to avoid reading information from 37 // outside of the container 38 MaskPaths []string 39 40 // WorkDir defines container default work directory (default: /w) 41 WorkDir string 42 43 // Stderr defines whether to dup container stderr to stderr for debug 44 Stderr io.Writer 45 46 // ExecFile defines executable that called Init, otherwise defer current 47 // executable (/proc/self/exe) 48 ExecFile string 49 50 // CredGenerator defines a credential generator used to create new container 51 CredGenerator CredGenerator 52 53 // Clone flags defines unshare clone flag to create container 54 CloneFlags uintptr 55 56 // HostName set container hostname (default: go-sandbox) 57 HostName string 58 59 // DomainName set container domainname (default: go-sandbox) 60 DomainName string 61 62 // InitCommand defines command that runs after the initialization of the container 63 // to do additional setups (for example, loopback network) 64 InitCommand []string 65 66 // ContainerUID & ContainerGID set the container uid / gid mapping 67 ContainerUID int 68 ContainerGID int 69 } 70 71 // SymbolicLink defines symlinks to be created after mount 72 type SymbolicLink struct { 73 LinkPath string 74 Target string 75 } 76 77 // CredGenerator generates uid / gid credential used by container 78 // to isolate process and file system access 79 type CredGenerator interface { 80 Get() syscall.Credential 81 } 82 83 // Environment holds single progrem containerized environment 84 type Environment interface { 85 Ping() error 86 Open([]OpenCmd) ([]*os.File, error) 87 Delete(p string) error 88 Reset() error 89 Execve(context.Context, ExecveParam) runner.Result 90 Destroy() error 91 } 92 93 // container manages single pre-forked container environment 94 type container struct { 95 process *os.Process // underlying container init pid 96 socket *socket // host - container communication 97 mu sync.Mutex // lock to avoid race condition 98 99 done chan struct{} 100 err error 101 doneOnce sync.Once 102 103 recvCh chan recvReply 104 sendCh chan sendCmd 105 } 106 107 type recvReply struct { 108 Reply reply 109 Msg unixsocket.Msg 110 } 111 112 type sendCmd struct { 113 Cmd cmd 114 Msg unixsocket.Msg 115 } 116 117 // Build creates new environment with underlying container 118 func (b *Builder) Build() (Environment, error) { 119 c, err := b.startContainer() 120 if err != nil { 121 return nil, err 122 } 123 124 // avoid non cinit enabled executable running as container init process 125 if err = c.Ping(); err != nil { 126 c.Destroy() 127 return nil, fmt.Errorf("container: container init not responding to ping %v", err) 128 } 129 130 // container mount points 131 mounts := b.Mounts 132 if len(mounts) == 0 { 133 mounts = mount.NewDefaultBuilder(). 134 WithTmpfs("w", ""). // work dir 135 WithTmpfs("tmp", ""). // tmp 136 FilterNotExist().Mounts 137 } 138 139 // container symbolic links 140 links := b.SymbolicLinks 141 if len(links) == 0 { 142 links = defaultSymLinks 143 } 144 145 maskPaths := b.MaskPaths 146 if len(maskPaths) == 0 { 147 maskPaths = defaultMaskPaths 148 } 149 150 // container root directory on the host 151 root := b.Root 152 if b.TmpRoot != "" { 153 if root, err = os.MkdirTemp(b.Root, b.TmpRoot); err != nil { 154 return nil, fmt.Errorf("container: failed to make tmp container root at %s %v", b.Root, err) 155 } 156 defer os.Remove(root) 157 } 158 if root == "" { 159 if root, err = os.Getwd(); err != nil { 160 return nil, fmt.Errorf("container: failed to get work directory %v", err) 161 } 162 } 163 workDir := containerWD 164 if b.WorkDir != "" { 165 workDir = b.WorkDir 166 } 167 hostName := containerName 168 if b.HostName != "" { 169 hostName = b.HostName 170 } 171 domainName := containerName 172 if b.DomainName != "" { 173 domainName = b.DomainName 174 } 175 176 // set configuration and check if container creation successful 177 if err = c.conf(&containerConfig{ 178 WorkDir: workDir, 179 HostName: hostName, 180 DomainName: domainName, 181 ContainerRoot: root, 182 Mounts: mounts, 183 SymbolicLinks: links, 184 MaskPaths: maskPaths, 185 InitCommand: b.InitCommand, 186 Cred: b.CredGenerator != nil, 187 ContainerUID: b.ContainerUID, 188 ContainerGID: b.ContainerGID, 189 UnshareCgroup: b.CloneFlags&unix.CLONE_NEWCGROUP == unix.CLONE_NEWCGROUP, 190 }); err != nil { 191 c.Destroy() 192 return nil, err 193 } 194 return c, nil 195 } 196 197 func (b *Builder) startContainer() (*container, error) { 198 var ( 199 err error 200 cred syscall.Credential 201 uidMap, gidMap []syscall.SysProcIDMap 202 ) 203 // prepare host <-> container unix socket 204 ins, outs, err := newPassCredSocketPair() 205 if err != nil { 206 return nil, fmt.Errorf("container: failed to create socket: %v", err) 207 } 208 defer outs.Close() 209 210 outf, err := outs.File() 211 if err != nil { 212 ins.Close() 213 return nil, fmt.Errorf("container: failed to dup container socket fd %v", err) 214 } 215 defer outf.Close() 216 217 // prepare container running credential 218 if b.CredGenerator != nil { 219 cred = b.CredGenerator.Get() 220 uidMap, gidMap = b.getIDMapping(&cred) 221 } else { 222 uidMap = []syscall.SysProcIDMap{{HostID: os.Geteuid(), Size: 1}} 223 gidMap = []syscall.SysProcIDMap{{HostID: os.Getegid(), Size: 1}} 224 } 225 226 var cloneFlag uintptr 227 if b.CloneFlags == 0 { 228 cloneFlag = forkexec.UnshareFlags 229 } else { 230 cloneFlag = b.CloneFlags & forkexec.UnshareFlags 231 } 232 233 exe := "/proc/self/exe" 234 if b.ExecFile != "" { 235 exe = b.ExecFile 236 } 237 args := []string{exe, initArg} 238 239 r := exec.Cmd{ 240 Path: exe, 241 Args: args, 242 Env: []string{PathEnv}, 243 Stderr: b.Stderr, 244 ExtraFiles: []*os.File{outf}, 245 SysProcAttr: &syscall.SysProcAttr{ 246 Cloneflags: cloneFlag, 247 UidMappings: uidMap, 248 GidMappings: gidMap, 249 AmbientCaps: []uintptr{ 250 unix.CAP_SYS_ADMIN, 251 unix.CAP_SYS_RESOURCE, 252 }, 253 Pdeathsig: syscall.SIGTERM, 254 }, 255 } 256 if err = r.Start(); err != nil { 257 ins.Close() 258 return nil, fmt.Errorf("container: failed to start container %v", err) 259 } 260 c := &container{ 261 process: r.Process, 262 socket: newSocket(ins), 263 recvCh: make(chan recvReply, 1), 264 sendCh: make(chan sendCmd, 1), 265 done: make(chan struct{}), 266 } 267 go c.sendLoop() 268 go c.recvLoop() 269 270 return c, nil 271 } 272 273 func (c *container) sendLoop() { 274 for { 275 select { 276 case <-c.done: 277 return 278 279 case cmd, ok := <-c.sendCh: 280 if !ok { 281 return 282 } 283 if err := c.socket.SendMsg(cmd.Cmd, cmd.Msg); err != nil { 284 c.socketError(err) 285 return 286 } 287 } 288 } 289 } 290 291 func (c *container) recvLoop() { 292 for { 293 var reply reply 294 msg, err := c.socket.RecvMsg(&reply) 295 if err != nil { 296 c.socketError(err) 297 return 298 } 299 c.recvCh <- recvReply{ 300 Reply: reply, 301 Msg: msg, 302 } 303 } 304 } 305 306 func (c *container) socketError(err error) { 307 c.doneOnce.Do(func() { 308 c.err = err 309 close(c.done) 310 }) 311 } 312 313 // Destroy kill the container process (with its children) 314 // if stderr enabled, collect the output as error 315 func (c *container) Destroy() error { 316 // close socket (abort any ongoing command) 317 c.socket.Close() 318 319 // wait commands terminates 320 c.mu.Lock() 321 defer c.mu.Unlock() 322 323 // kill process 324 c.process.Kill() 325 _, err := c.process.Wait() 326 return err 327 } 328 329 // newPassCredSocketPair creates socket pair and let the first socket to receive credential information 330 func newPassCredSocketPair() (*unixsocket.Socket, *unixsocket.Socket, error) { 331 ins, outs, err := unixsocket.NewSocketPair() 332 if err != nil { 333 return nil, nil, err 334 } 335 if err = ins.SetPassCred(1); err != nil { 336 ins.Close() 337 outs.Close() 338 return nil, nil, err 339 } 340 return ins, outs, nil 341 } 342 343 func (b *Builder) getIDMapping(cred *syscall.Credential) ([]syscall.SysProcIDMap, []syscall.SysProcIDMap) { 344 cUID := b.ContainerUID 345 if cUID == 0 { 346 cUID = containerUID 347 } 348 349 cGID := b.ContainerGID 350 if cGID == 0 { 351 cGID = containerGID 352 } 353 354 uidMap := []syscall.SysProcIDMap{ 355 { 356 ContainerID: 0, 357 HostID: os.Geteuid(), 358 Size: 1, 359 }, 360 { 361 ContainerID: cUID, 362 HostID: int(cred.Uid), 363 Size: 1, 364 }, 365 } 366 367 gidMap := []syscall.SysProcIDMap{ 368 { 369 ContainerID: 0, 370 HostID: os.Getegid(), 371 Size: 1, 372 }, 373 { 374 ContainerID: cGID, 375 HostID: int(cred.Gid), 376 Size: 1, 377 }, 378 } 379 380 return uidMap, gidMap 381 } 382 383 func (c *container) recvAckReply(name string) error { 384 reply, _, err := c.recvReply() 385 if err != nil { 386 return fmt.Errorf("%v: recvAck %v", name, err) 387 } 388 if reply.Error != nil { 389 return fmt.Errorf("%v: container error %v", name, reply.Error) 390 } 391 return nil 392 } 393 func (c *container) recvReply() (reply, unixsocket.Msg, error) { 394 select { 395 case <-c.done: 396 return reply{}, unixsocket.Msg{}, c.err 397 398 case recv := <-c.recvCh: 399 return recv.Reply, recv.Msg, nil 400 } 401 } 402 403 func (c *container) sendCmd(cmd cmd, msg unixsocket.Msg) error { 404 select { 405 case <-c.done: 406 return c.err 407 408 case c.sendCh <- sendCmd{Cmd: cmd, Msg: msg}: 409 return nil 410 } 411 }