github.com/zhuohuang-hust/src-cbuild@v0.0.0-20230105071821-c7aab3e7c840/mergeCode/runc/libcontainer/process_linux.go (about) 1 // +build linux 2 3 package libcontainer 4 5 import ( 6 "encoding/json" 7 "errors" 8 "fmt" 9 "io" 10 "os" 11 "os/exec" 12 "path/filepath" 13 "strconv" 14 "syscall" 15 16 "github.com/opencontainers/runc/libcontainer/cgroups" 17 "github.com/opencontainers/runc/libcontainer/configs" 18 "github.com/opencontainers/runc/libcontainer/system" 19 "github.com/opencontainers/runc/libcontainer/utils" 20 ) 21 22 type parentProcess interface { 23 // pid returns the pid for the running process. 24 pid() int 25 26 // start starts the process execution. 27 start() error 28 29 // send a SIGKILL to the process and wait for the exit. 30 terminate() error 31 32 // wait waits on the process returning the process state. 33 wait() (*os.ProcessState, error) 34 35 // startTime returns the process start time. 36 startTime() (string, error) 37 38 signal(os.Signal) error 39 40 externalDescriptors() []string 41 42 setExternalDescriptors(fds []string) 43 } 44 45 type setnsProcess struct { 46 cmd *exec.Cmd 47 parentPipe *os.File 48 childPipe *os.File 49 cgroupPaths map[string]string 50 config *initConfig 51 fds []string 52 process *Process 53 bootstrapData io.Reader 54 rootDir *os.File 55 } 56 57 func (p *setnsProcess) startTime() (string, error) { 58 return system.GetProcessStartTime(p.pid()) 59 } 60 61 func (p *setnsProcess) signal(sig os.Signal) error { 62 s, ok := sig.(syscall.Signal) 63 if !ok { 64 return errors.New("os: unsupported signal type") 65 } 66 return syscall.Kill(p.pid(), s) 67 } 68 69 func (p *setnsProcess) start() (err error) { 70 defer p.parentPipe.Close() 71 err = p.cmd.Start() 72 p.childPipe.Close() 73 p.rootDir.Close() 74 if err != nil { 75 return newSystemErrorWithCause(err, "starting setns process") 76 } 77 if p.bootstrapData != nil { 78 if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { 79 return newSystemErrorWithCause(err, "copying bootstrap data to pipe") 80 } 81 } 82 if err = p.execSetns(); err != nil { 83 return newSystemErrorWithCause(err, "executing setns process") 84 } 85 if len(p.cgroupPaths) > 0 { 86 if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil { 87 return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) 88 } 89 } 90 // set oom_score_adj 91 if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { 92 return newSystemErrorWithCause(err, "setting oom score") 93 } 94 // set rlimits, this has to be done here because we lose permissions 95 // to raise the limits once we enter a user-namespace 96 if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { 97 return newSystemErrorWithCause(err, "setting rlimits for process") 98 } 99 if err := utils.WriteJSON(p.parentPipe, p.config); err != nil { 100 return newSystemErrorWithCause(err, "writing config to pipe") 101 } 102 103 if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { 104 return newSystemErrorWithCause(err, "calling shutdown on init pipe") 105 } 106 // wait for the child process to fully complete and receive an error message 107 // if one was encoutered 108 var ierr *genericError 109 if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF { 110 return newSystemErrorWithCause(err, "decoding init error from pipe") 111 } 112 // Must be done after Shutdown so the child will exit and we can wait for it. 113 if ierr != nil { 114 p.wait() 115 return ierr 116 } 117 return nil 118 } 119 120 // execSetns runs the process that executes C code to perform the setns calls 121 // because setns support requires the C process to fork off a child and perform the setns 122 // before the go runtime boots, we wait on the process to die and receive the child's pid 123 // over the provided pipe. 124 func (p *setnsProcess) execSetns() error { 125 status, err := p.cmd.Process.Wait() 126 if err != nil { 127 p.cmd.Wait() 128 return newSystemErrorWithCause(err, "waiting on setns process to finish") 129 } 130 if !status.Success() { 131 p.cmd.Wait() 132 return newSystemError(&exec.ExitError{ProcessState: status}) 133 } 134 var pid *pid 135 if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil { 136 p.cmd.Wait() 137 return newSystemErrorWithCause(err, "reading pid from init pipe") 138 } 139 process, err := os.FindProcess(pid.Pid) 140 if err != nil { 141 return err 142 } 143 p.cmd.Process = process 144 p.process.ops = p 145 return nil 146 } 147 148 // terminate sends a SIGKILL to the forked process for the setns routine then waits to 149 // avoid the process becoming a zombie. 150 func (p *setnsProcess) terminate() error { 151 if p.cmd.Process == nil { 152 return nil 153 } 154 err := p.cmd.Process.Kill() 155 if _, werr := p.wait(); err == nil { 156 err = werr 157 } 158 return err 159 } 160 161 func (p *setnsProcess) wait() (*os.ProcessState, error) { 162 err := p.cmd.Wait() 163 164 // Return actual ProcessState even on Wait error 165 return p.cmd.ProcessState, err 166 } 167 168 func (p *setnsProcess) pid() int { 169 return p.cmd.Process.Pid 170 } 171 172 func (p *setnsProcess) externalDescriptors() []string { 173 return p.fds 174 } 175 176 func (p *setnsProcess) setExternalDescriptors(newFds []string) { 177 p.fds = newFds 178 } 179 180 type initProcess struct { 181 cmd *exec.Cmd 182 parentPipe *os.File 183 childPipe *os.File 184 config *initConfig 185 manager cgroups.Manager 186 container *linuxContainer 187 fds []string 188 process *Process 189 bootstrapData io.Reader 190 sharePidns bool 191 rootDir *os.File 192 } 193 194 func (p *initProcess) pid() int { 195 return p.cmd.Process.Pid 196 } 197 198 func (p *initProcess) externalDescriptors() []string { 199 return p.fds 200 } 201 202 // execSetns runs the process that executes C code to perform the setns calls 203 // because setns support requires the C process to fork off a child and perform the setns 204 // before the go runtime boots, we wait on the process to die and receive the child's pid 205 // over the provided pipe. 206 // This is called by initProcess.start function 207 func (p *initProcess) execSetns() error { 208 status, err := p.cmd.Process.Wait() 209 if err != nil { 210 p.cmd.Wait() 211 return err 212 } 213 if !status.Success() { 214 p.cmd.Wait() 215 return &exec.ExitError{ProcessState: status} 216 } 217 var pid *pid 218 if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil { 219 p.cmd.Wait() 220 return err 221 } 222 process, err := os.FindProcess(pid.Pid) 223 if err != nil { 224 return err 225 } 226 p.cmd.Process = process 227 p.process.ops = p 228 return nil 229 } 230 231 func (p *initProcess) start() error { 232 defer p.parentPipe.Close() 233 err := p.cmd.Start() 234 p.process.ops = p 235 p.childPipe.Close() 236 p.rootDir.Close() 237 if err != nil { 238 p.process.ops = nil 239 return newSystemErrorWithCause(err, "starting init process command") 240 } 241 if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { 242 return err 243 } 244 if err := p.execSetns(); err != nil { 245 return newSystemErrorWithCause(err, "running exec setns process for init") 246 } 247 // Save the standard descriptor names before the container process 248 // can potentially move them (e.g., via dup2()). If we don't do this now, 249 // we won't know at checkpoint time which file descriptor to look up. 250 fds, err := getPipeFds(p.pid()) 251 if err != nil { 252 return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid()) 253 } 254 p.setExternalDescriptors(fds) 255 // Do this before syncing with child so that no children 256 // can escape the cgroup 257 if err := p.manager.Apply(p.pid()); err != nil { 258 return newSystemErrorWithCause(err, "applying cgroup configuration for process") 259 } 260 defer func() { 261 if err != nil { 262 // TODO: should not be the responsibility to call here 263 p.manager.Destroy() 264 } 265 }() 266 if err := p.createNetworkInterfaces(); err != nil { 267 return newSystemErrorWithCause(err, "creating network interfaces") 268 } 269 if err := p.sendConfig(); err != nil { 270 return newSystemErrorWithCause(err, "sending config to init process") 271 } 272 var ( 273 procSync syncT 274 sentRun bool 275 sentResume bool 276 ierr *genericError 277 ) 278 279 dec := json.NewDecoder(p.parentPipe) 280 loop: 281 for { 282 if err := dec.Decode(&procSync); err != nil { 283 if err == io.EOF { 284 break loop 285 } 286 return newSystemErrorWithCause(err, "decoding sync type from init pipe") 287 } 288 switch procSync.Type { 289 case procReady: 290 if err := p.manager.Set(p.config.Config); err != nil { 291 return newSystemErrorWithCause(err, "setting cgroup config for ready process") 292 } 293 // set oom_score_adj 294 if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil { 295 return newSystemErrorWithCause(err, "setting oom score for ready process") 296 } 297 // set rlimits, this has to be done here because we lose permissions 298 // to raise the limits once we enter a user-namespace 299 if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { 300 return newSystemErrorWithCause(err, "setting rlimits for ready process") 301 } 302 // call prestart hooks 303 if !p.config.Config.Namespaces.Contains(configs.NEWNS) { 304 if p.config.Config.Hooks != nil { 305 s := configs.HookState{ 306 Version: p.container.config.Version, 307 ID: p.container.id, 308 Pid: p.pid(), 309 Root: p.config.Config.Rootfs, 310 } 311 for i, hook := range p.config.Config.Hooks.Prestart { 312 if err := hook.Run(s); err != nil { 313 return newSystemErrorWithCausef(err, "running prestart hook %d", i) 314 } 315 } 316 } 317 } 318 // Sync with child. 319 if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil { 320 return newSystemErrorWithCause(err, "writing syncT run type") 321 } 322 sentRun = true 323 case procHooks: 324 if p.config.Config.Hooks != nil { 325 s := configs.HookState{ 326 Version: p.container.config.Version, 327 ID: p.container.id, 328 Pid: p.pid(), 329 Root: p.config.Config.Rootfs, 330 BundlePath: utils.SearchLabels(p.config.Config.Labels, "bundle"), 331 } 332 for i, hook := range p.config.Config.Hooks.Prestart { 333 if err := hook.Run(s); err != nil { 334 return newSystemErrorWithCausef(err, "running prestart hook %d", i) 335 } 336 } 337 } 338 // Sync with child. 339 if err := utils.WriteJSON(p.parentPipe, syncT{procResume}); err != nil { 340 return newSystemErrorWithCause(err, "writing syncT resume type") 341 } 342 sentResume = true 343 case procError: 344 // wait for the child process to fully complete and receive an error message 345 // if one was encoutered 346 if err := dec.Decode(&ierr); err != nil && err != io.EOF { 347 return newSystemErrorWithCause(err, "decoding proc error from init") 348 } 349 if ierr != nil { 350 break loop 351 } 352 // Programmer error. 353 panic("No error following JSON procError payload.") 354 default: 355 return newSystemError(fmt.Errorf("invalid JSON payload from child")) 356 } 357 } 358 if !sentRun { 359 return newSystemErrorWithCause(ierr, "container init") 360 } 361 if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume { 362 return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process")) 363 } 364 if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil { 365 return newSystemErrorWithCause(err, "shutting down init pipe") 366 } 367 // Must be done after Shutdown so the child will exit and we can wait for it. 368 if ierr != nil { 369 p.wait() 370 return ierr 371 } 372 return nil 373 } 374 375 func (p *initProcess) wait() (*os.ProcessState, error) { 376 err := p.cmd.Wait() 377 if err != nil { 378 return p.cmd.ProcessState, err 379 } 380 // we should kill all processes in cgroup when init is died if we use host PID namespace 381 if p.sharePidns { 382 signalAllProcesses(p.manager, syscall.SIGKILL) 383 } 384 return p.cmd.ProcessState, nil 385 } 386 387 func (p *initProcess) terminate() error { 388 if p.cmd.Process == nil { 389 return nil 390 } 391 err := p.cmd.Process.Kill() 392 if _, werr := p.wait(); err == nil { 393 err = werr 394 } 395 return err 396 } 397 398 func (p *initProcess) startTime() (string, error) { 399 return system.GetProcessStartTime(p.pid()) 400 } 401 402 func (p *initProcess) sendConfig() error { 403 // send the config to the container's init process, we don't use JSON Encode 404 // here because there might be a problem in JSON decoder in some cases, see: 405 // https://github.com/docker/docker/issues/14203#issuecomment-174177790 406 return utils.WriteJSON(p.parentPipe, p.config) 407 } 408 409 func (p *initProcess) createNetworkInterfaces() error { 410 for _, config := range p.config.Config.Networks { 411 strategy, err := getStrategy(config.Type) 412 if err != nil { 413 return err 414 } 415 n := &network{ 416 Network: *config, 417 } 418 if err := strategy.create(n, p.pid()); err != nil { 419 return err 420 } 421 p.config.Networks = append(p.config.Networks, n) 422 } 423 return nil 424 } 425 426 func (p *initProcess) signal(sig os.Signal) error { 427 s, ok := sig.(syscall.Signal) 428 if !ok { 429 return errors.New("os: unsupported signal type") 430 } 431 return syscall.Kill(p.pid(), s) 432 } 433 434 func (p *initProcess) setExternalDescriptors(newFds []string) { 435 p.fds = newFds 436 } 437 438 func getPipeFds(pid int) ([]string, error) { 439 fds := make([]string, 3) 440 441 dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd") 442 for i := 0; i < 3; i++ { 443 f := filepath.Join(dirPath, strconv.Itoa(i)) 444 target, err := os.Readlink(f) 445 if err != nil { 446 return fds, err 447 } 448 fds[i] = target 449 } 450 return fds, nil 451 } 452 453 // InitializeIO creates pipes for use with the process's STDIO 454 // and returns the opposite side for each 455 func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) { 456 var fds []uintptr 457 i = &IO{} 458 // cleanup in case of an error 459 defer func() { 460 if err != nil { 461 for _, fd := range fds { 462 syscall.Close(int(fd)) 463 } 464 } 465 }() 466 // STDIN 467 r, w, err := os.Pipe() 468 if err != nil { 469 return nil, err 470 } 471 fds = append(fds, r.Fd(), w.Fd()) 472 p.Stdin, i.Stdin = r, w 473 // STDOUT 474 if r, w, err = os.Pipe(); err != nil { 475 return nil, err 476 } 477 fds = append(fds, r.Fd(), w.Fd()) 478 p.Stdout, i.Stdout = w, r 479 // STDERR 480 if r, w, err = os.Pipe(); err != nil { 481 return nil, err 482 } 483 fds = append(fds, r.Fd(), w.Fd()) 484 p.Stderr, i.Stderr = w, r 485 // change ownership of the pipes incase we are in a user namespace 486 for _, fd := range fds { 487 if err := syscall.Fchown(int(fd), rootuid, rootgid); err != nil { 488 return nil, err 489 } 490 } 491 return i, nil 492 }