github.com/zhuohuang-hust/src-cbuild@v0.0.0-20230105071821-c7aab3e7c840/mergeCode/runc/libcontainer/process_linux.go (about)

     1  // +build linux
     2  
     3  package libcontainer
     4  
     5  import (
     6  	"encoding/json"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"os/exec"
    12  	"path/filepath"
    13  	"strconv"
    14  	"syscall"
    15  
    16  	"github.com/opencontainers/runc/libcontainer/cgroups"
    17  	"github.com/opencontainers/runc/libcontainer/configs"
    18  	"github.com/opencontainers/runc/libcontainer/system"
    19  	"github.com/opencontainers/runc/libcontainer/utils"
    20  )
    21  
    22  type parentProcess interface {
    23  	// pid returns the pid for the running process.
    24  	pid() int
    25  
    26  	// start starts the process execution.
    27  	start() error
    28  
    29  	// send a SIGKILL to the process and wait for the exit.
    30  	terminate() error
    31  
    32  	// wait waits on the process returning the process state.
    33  	wait() (*os.ProcessState, error)
    34  
    35  	// startTime returns the process start time.
    36  	startTime() (string, error)
    37  
    38  	signal(os.Signal) error
    39  
    40  	externalDescriptors() []string
    41  
    42  	setExternalDescriptors(fds []string)
    43  }
    44  
    45  type setnsProcess struct {
    46  	cmd           *exec.Cmd
    47  	parentPipe    *os.File
    48  	childPipe     *os.File
    49  	cgroupPaths   map[string]string
    50  	config        *initConfig
    51  	fds           []string
    52  	process       *Process
    53  	bootstrapData io.Reader
    54  	rootDir       *os.File
    55  }
    56  
    57  func (p *setnsProcess) startTime() (string, error) {
    58  	return system.GetProcessStartTime(p.pid())
    59  }
    60  
    61  func (p *setnsProcess) signal(sig os.Signal) error {
    62  	s, ok := sig.(syscall.Signal)
    63  	if !ok {
    64  		return errors.New("os: unsupported signal type")
    65  	}
    66  	return syscall.Kill(p.pid(), s)
    67  }
    68  
    69  func (p *setnsProcess) start() (err error) {
    70  	defer p.parentPipe.Close()
    71  	err = p.cmd.Start()
    72  	p.childPipe.Close()
    73  	p.rootDir.Close()
    74  	if err != nil {
    75  		return newSystemErrorWithCause(err, "starting setns process")
    76  	}
    77  	if p.bootstrapData != nil {
    78  		if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
    79  			return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
    80  		}
    81  	}
    82  	if err = p.execSetns(); err != nil {
    83  		return newSystemErrorWithCause(err, "executing setns process")
    84  	}
    85  	if len(p.cgroupPaths) > 0 {
    86  		if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
    87  			return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
    88  		}
    89  	}
    90  	// set oom_score_adj
    91  	if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
    92  		return newSystemErrorWithCause(err, "setting oom score")
    93  	}
    94  	// set rlimits, this has to be done here because we lose permissions
    95  	// to raise the limits once we enter a user-namespace
    96  	if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
    97  		return newSystemErrorWithCause(err, "setting rlimits for process")
    98  	}
    99  	if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
   100  		return newSystemErrorWithCause(err, "writing config to pipe")
   101  	}
   102  
   103  	if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
   104  		return newSystemErrorWithCause(err, "calling shutdown on init pipe")
   105  	}
   106  	// wait for the child process to fully complete and receive an error message
   107  	// if one was encoutered
   108  	var ierr *genericError
   109  	if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
   110  		return newSystemErrorWithCause(err, "decoding init error from pipe")
   111  	}
   112  	// Must be done after Shutdown so the child will exit and we can wait for it.
   113  	if ierr != nil {
   114  		p.wait()
   115  		return ierr
   116  	}
   117  	return nil
   118  }
   119  
   120  // execSetns runs the process that executes C code to perform the setns calls
   121  // because setns support requires the C process to fork off a child and perform the setns
   122  // before the go runtime boots, we wait on the process to die and receive the child's pid
   123  // over the provided pipe.
   124  func (p *setnsProcess) execSetns() error {
   125  	status, err := p.cmd.Process.Wait()
   126  	if err != nil {
   127  		p.cmd.Wait()
   128  		return newSystemErrorWithCause(err, "waiting on setns process to finish")
   129  	}
   130  	if !status.Success() {
   131  		p.cmd.Wait()
   132  		return newSystemError(&exec.ExitError{ProcessState: status})
   133  	}
   134  	var pid *pid
   135  	if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
   136  		p.cmd.Wait()
   137  		return newSystemErrorWithCause(err, "reading pid from init pipe")
   138  	}
   139  	process, err := os.FindProcess(pid.Pid)
   140  	if err != nil {
   141  		return err
   142  	}
   143  	p.cmd.Process = process
   144  	p.process.ops = p
   145  	return nil
   146  }
   147  
   148  // terminate sends a SIGKILL to the forked process for the setns routine then waits to
   149  // avoid the process becoming a zombie.
   150  func (p *setnsProcess) terminate() error {
   151  	if p.cmd.Process == nil {
   152  		return nil
   153  	}
   154  	err := p.cmd.Process.Kill()
   155  	if _, werr := p.wait(); err == nil {
   156  		err = werr
   157  	}
   158  	return err
   159  }
   160  
   161  func (p *setnsProcess) wait() (*os.ProcessState, error) {
   162  	err := p.cmd.Wait()
   163  
   164  	// Return actual ProcessState even on Wait error
   165  	return p.cmd.ProcessState, err
   166  }
   167  
   168  func (p *setnsProcess) pid() int {
   169  	return p.cmd.Process.Pid
   170  }
   171  
   172  func (p *setnsProcess) externalDescriptors() []string {
   173  	return p.fds
   174  }
   175  
   176  func (p *setnsProcess) setExternalDescriptors(newFds []string) {
   177  	p.fds = newFds
   178  }
   179  
   180  type initProcess struct {
   181  	cmd           *exec.Cmd
   182  	parentPipe    *os.File
   183  	childPipe     *os.File
   184  	config        *initConfig
   185  	manager       cgroups.Manager
   186  	container     *linuxContainer
   187  	fds           []string
   188  	process       *Process
   189  	bootstrapData io.Reader
   190  	sharePidns    bool
   191  	rootDir       *os.File
   192  }
   193  
   194  func (p *initProcess) pid() int {
   195  	return p.cmd.Process.Pid
   196  }
   197  
   198  func (p *initProcess) externalDescriptors() []string {
   199  	return p.fds
   200  }
   201  
   202  // execSetns runs the process that executes C code to perform the setns calls
   203  // because setns support requires the C process to fork off a child and perform the setns
   204  // before the go runtime boots, we wait on the process to die and receive the child's pid
   205  // over the provided pipe.
   206  // This is called by initProcess.start function
   207  func (p *initProcess) execSetns() error {
   208  	status, err := p.cmd.Process.Wait()
   209  	if err != nil {
   210  		p.cmd.Wait()
   211  		return err
   212  	}
   213  	if !status.Success() {
   214  		p.cmd.Wait()
   215  		return &exec.ExitError{ProcessState: status}
   216  	}
   217  	var pid *pid
   218  	if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
   219  		p.cmd.Wait()
   220  		return err
   221  	}
   222  	process, err := os.FindProcess(pid.Pid)
   223  	if err != nil {
   224  		return err
   225  	}
   226  	p.cmd.Process = process
   227  	p.process.ops = p
   228  	return nil
   229  }
   230  
   231  func (p *initProcess) start() error {
   232  	defer p.parentPipe.Close()
   233  	err := p.cmd.Start()
   234  	p.process.ops = p
   235  	p.childPipe.Close()
   236  	p.rootDir.Close()
   237  	if err != nil {
   238  		p.process.ops = nil
   239  		return newSystemErrorWithCause(err, "starting init process command")
   240  	}
   241  	if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
   242  		return err
   243  	}
   244  	if err := p.execSetns(); err != nil {
   245  		return newSystemErrorWithCause(err, "running exec setns process for init")
   246  	}
   247  	// Save the standard descriptor names before the container process
   248  	// can potentially move them (e.g., via dup2()).  If we don't do this now,
   249  	// we won't know at checkpoint time which file descriptor to look up.
   250  	fds, err := getPipeFds(p.pid())
   251  	if err != nil {
   252  		return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
   253  	}
   254  	p.setExternalDescriptors(fds)
   255  	// Do this before syncing with child so that no children
   256  	// can escape the cgroup
   257  	if err := p.manager.Apply(p.pid()); err != nil {
   258  		return newSystemErrorWithCause(err, "applying cgroup configuration for process")
   259  	}
   260  	defer func() {
   261  		if err != nil {
   262  			// TODO: should not be the responsibility to call here
   263  			p.manager.Destroy()
   264  		}
   265  	}()
   266  	if err := p.createNetworkInterfaces(); err != nil {
   267  		return newSystemErrorWithCause(err, "creating network interfaces")
   268  	}
   269  	if err := p.sendConfig(); err != nil {
   270  		return newSystemErrorWithCause(err, "sending config to init process")
   271  	}
   272  	var (
   273  		procSync   syncT
   274  		sentRun    bool
   275  		sentResume bool
   276  		ierr       *genericError
   277  	)
   278  
   279  	dec := json.NewDecoder(p.parentPipe)
   280  loop:
   281  	for {
   282  		if err := dec.Decode(&procSync); err != nil {
   283  			if err == io.EOF {
   284  				break loop
   285  			}
   286  			return newSystemErrorWithCause(err, "decoding sync type from init pipe")
   287  		}
   288  		switch procSync.Type {
   289  		case procReady:
   290  			if err := p.manager.Set(p.config.Config); err != nil {
   291  				return newSystemErrorWithCause(err, "setting cgroup config for ready process")
   292  			}
   293  			// set oom_score_adj
   294  			if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
   295  				return newSystemErrorWithCause(err, "setting oom score for ready process")
   296  			}
   297  			// set rlimits, this has to be done here because we lose permissions
   298  			// to raise the limits once we enter a user-namespace
   299  			if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
   300  				return newSystemErrorWithCause(err, "setting rlimits for ready process")
   301  			}
   302  			// call prestart hooks
   303  			if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
   304  				if p.config.Config.Hooks != nil {
   305  					s := configs.HookState{
   306  						Version: p.container.config.Version,
   307  						ID:      p.container.id,
   308  						Pid:     p.pid(),
   309  						Root:    p.config.Config.Rootfs,
   310  					}
   311  					for i, hook := range p.config.Config.Hooks.Prestart {
   312  						if err := hook.Run(s); err != nil {
   313  							return newSystemErrorWithCausef(err, "running prestart hook %d", i)
   314  						}
   315  					}
   316  				}
   317  			}
   318  			// Sync with child.
   319  			if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil {
   320  				return newSystemErrorWithCause(err, "writing syncT run type")
   321  			}
   322  			sentRun = true
   323  		case procHooks:
   324  			if p.config.Config.Hooks != nil {
   325  				s := configs.HookState{
   326  					Version:    p.container.config.Version,
   327  					ID:         p.container.id,
   328  					Pid:        p.pid(),
   329  					Root:       p.config.Config.Rootfs,
   330  					BundlePath: utils.SearchLabels(p.config.Config.Labels, "bundle"),
   331  				}
   332  				for i, hook := range p.config.Config.Hooks.Prestart {
   333  					if err := hook.Run(s); err != nil {
   334  						return newSystemErrorWithCausef(err, "running prestart hook %d", i)
   335  					}
   336  				}
   337  			}
   338  			// Sync with child.
   339  			if err := utils.WriteJSON(p.parentPipe, syncT{procResume}); err != nil {
   340  				return newSystemErrorWithCause(err, "writing syncT resume type")
   341  			}
   342  			sentResume = true
   343  		case procError:
   344  			// wait for the child process to fully complete and receive an error message
   345  			// if one was encoutered
   346  			if err := dec.Decode(&ierr); err != nil && err != io.EOF {
   347  				return newSystemErrorWithCause(err, "decoding proc error from init")
   348  			}
   349  			if ierr != nil {
   350  				break loop
   351  			}
   352  			// Programmer error.
   353  			panic("No error following JSON procError payload.")
   354  		default:
   355  			return newSystemError(fmt.Errorf("invalid JSON payload from child"))
   356  		}
   357  	}
   358  	if !sentRun {
   359  		return newSystemErrorWithCause(ierr, "container init")
   360  	}
   361  	if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
   362  		return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
   363  	}
   364  	if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
   365  		return newSystemErrorWithCause(err, "shutting down init pipe")
   366  	}
   367  	// Must be done after Shutdown so the child will exit and we can wait for it.
   368  	if ierr != nil {
   369  		p.wait()
   370  		return ierr
   371  	}
   372  	return nil
   373  }
   374  
   375  func (p *initProcess) wait() (*os.ProcessState, error) {
   376  	err := p.cmd.Wait()
   377  	if err != nil {
   378  		return p.cmd.ProcessState, err
   379  	}
   380  	// we should kill all processes in cgroup when init is died if we use host PID namespace
   381  	if p.sharePidns {
   382  		signalAllProcesses(p.manager, syscall.SIGKILL)
   383  	}
   384  	return p.cmd.ProcessState, nil
   385  }
   386  
   387  func (p *initProcess) terminate() error {
   388  	if p.cmd.Process == nil {
   389  		return nil
   390  	}
   391  	err := p.cmd.Process.Kill()
   392  	if _, werr := p.wait(); err == nil {
   393  		err = werr
   394  	}
   395  	return err
   396  }
   397  
   398  func (p *initProcess) startTime() (string, error) {
   399  	return system.GetProcessStartTime(p.pid())
   400  }
   401  
   402  func (p *initProcess) sendConfig() error {
   403  	// send the config to the container's init process, we don't use JSON Encode
   404  	// here because there might be a problem in JSON decoder in some cases, see:
   405  	// https://github.com/docker/docker/issues/14203#issuecomment-174177790
   406  	return utils.WriteJSON(p.parentPipe, p.config)
   407  }
   408  
   409  func (p *initProcess) createNetworkInterfaces() error {
   410  	for _, config := range p.config.Config.Networks {
   411  		strategy, err := getStrategy(config.Type)
   412  		if err != nil {
   413  			return err
   414  		}
   415  		n := &network{
   416  			Network: *config,
   417  		}
   418  		if err := strategy.create(n, p.pid()); err != nil {
   419  			return err
   420  		}
   421  		p.config.Networks = append(p.config.Networks, n)
   422  	}
   423  	return nil
   424  }
   425  
   426  func (p *initProcess) signal(sig os.Signal) error {
   427  	s, ok := sig.(syscall.Signal)
   428  	if !ok {
   429  		return errors.New("os: unsupported signal type")
   430  	}
   431  	return syscall.Kill(p.pid(), s)
   432  }
   433  
   434  func (p *initProcess) setExternalDescriptors(newFds []string) {
   435  	p.fds = newFds
   436  }
   437  
   438  func getPipeFds(pid int) ([]string, error) {
   439  	fds := make([]string, 3)
   440  
   441  	dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
   442  	for i := 0; i < 3; i++ {
   443  		f := filepath.Join(dirPath, strconv.Itoa(i))
   444  		target, err := os.Readlink(f)
   445  		if err != nil {
   446  			return fds, err
   447  		}
   448  		fds[i] = target
   449  	}
   450  	return fds, nil
   451  }
   452  
   453  // InitializeIO creates pipes for use with the process's STDIO
   454  // and returns the opposite side for each
   455  func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
   456  	var fds []uintptr
   457  	i = &IO{}
   458  	// cleanup in case of an error
   459  	defer func() {
   460  		if err != nil {
   461  			for _, fd := range fds {
   462  				syscall.Close(int(fd))
   463  			}
   464  		}
   465  	}()
   466  	// STDIN
   467  	r, w, err := os.Pipe()
   468  	if err != nil {
   469  		return nil, err
   470  	}
   471  	fds = append(fds, r.Fd(), w.Fd())
   472  	p.Stdin, i.Stdin = r, w
   473  	// STDOUT
   474  	if r, w, err = os.Pipe(); err != nil {
   475  		return nil, err
   476  	}
   477  	fds = append(fds, r.Fd(), w.Fd())
   478  	p.Stdout, i.Stdout = w, r
   479  	// STDERR
   480  	if r, w, err = os.Pipe(); err != nil {
   481  		return nil, err
   482  	}
   483  	fds = append(fds, r.Fd(), w.Fd())
   484  	p.Stderr, i.Stderr = w, r
   485  	// change ownership of the pipes incase we are in a user namespace
   486  	for _, fd := range fds {
   487  		if err := syscall.Fchown(int(fd), rootuid, rootgid); err != nil {
   488  			return nil, err
   489  		}
   490  	}
   491  	return i, nil
   492  }