github.com/zhuohuang-hust/src-cbuild@v0.0.0-20230105071821-c7aab3e7c840/mergeCode/runc/libcontainer/standard_init_linux.go (about)

     1  // +build linux
     2  
     3  package libcontainer
     4  
     5  import (
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"os/exec"
    10  	"syscall"
    11  
    12  	"github.com/opencontainers/runc/libcontainer/apparmor"
    13  	"github.com/opencontainers/runc/libcontainer/configs"
    14  	"github.com/opencontainers/runc/libcontainer/keys"
    15  	"github.com/opencontainers/runc/libcontainer/label"
    16  	"github.com/opencontainers/runc/libcontainer/seccomp"
    17  	"github.com/opencontainers/runc/libcontainer/system"
    18  )
    19  
    20  type linuxStandardInit struct {
    21  	pipe       io.ReadWriteCloser
    22  	parentPid  int
    23  	stateDirFD int
    24  	config     *initConfig
    25  }
    26  
    27  func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
    28  	var newperms uint32
    29  
    30  	if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
    31  		// with user ns we need 'other' search permissions
    32  		newperms = 0x8
    33  	} else {
    34  		// without user ns we need 'UID' search permissions
    35  		newperms = 0x80000
    36  	}
    37  
    38  	// create a unique per session container name that we can
    39  	// join in setns; however, other containers can also join it
    40  	return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
    41  }
    42  
    43  // PR_SET_NO_NEW_PRIVS isn't exposed in Golang so we define it ourselves copying the value
    44  // the kernel
    45  const PR_SET_NO_NEW_PRIVS = 0x26
    46  
    47  func (l *linuxStandardInit) Init() error {
    48  	if !l.config.Config.NoNewKeyring {
    49  		ringname, keepperms, newperms := l.getSessionRingParams()
    50  
    51  		// do not inherit the parent's session keyring
    52  		sessKeyId, err := keys.JoinSessionKeyring(ringname)
    53  		if err != nil {
    54  			return err
    55  		}
    56  		// make session keyring searcheable
    57  		if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
    58  			return err
    59  		}
    60  	}
    61  
    62  	var console *linuxConsole
    63  	if l.config.Console != "" {
    64  		console = newConsoleFromPath(l.config.Console)
    65  		if err := console.dupStdio(); err != nil {
    66  			return err
    67  		}
    68  	}
    69  	if console != nil {
    70  		if err := system.Setctty(); err != nil {
    71  			return err
    72  		}
    73  	}
    74  	if err := setupNetwork(l.config); err != nil {
    75  		return err
    76  	}
    77  	if err := setupRoute(l.config.Config); err != nil {
    78  		return err
    79  	}
    80  
    81  	label.Init()
    82  	// InitializeMountNamespace() can be executed only for a new mount namespace
    83  	if l.config.Config.Namespaces.Contains(configs.NEWNS) {
    84  		if err := setupRootfs(l.config.Config, console, l.pipe); err != nil {
    85  			return err
    86  		}
    87  	}
    88  	if hostname := l.config.Config.Hostname; hostname != "" {
    89  		if err := syscall.Sethostname([]byte(hostname)); err != nil {
    90  			return err
    91  		}
    92  	}
    93  	if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
    94  		return err
    95  	}
    96  	if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
    97  		return err
    98  	}
    99  
   100  	for key, value := range l.config.Config.Sysctl {
   101  		if err := writeSystemProperty(key, value); err != nil {
   102  			return err
   103  		}
   104  	}
   105  	for _, path := range l.config.Config.ReadonlyPaths {
   106  		if err := remountReadonly(path); err != nil {
   107  			return err
   108  		}
   109  	}
   110  	for _, path := range l.config.Config.MaskPaths {
   111  		if err := maskPath(path); err != nil {
   112  			return err
   113  		}
   114  	}
   115  	pdeath, err := system.GetParentDeathSignal()
   116  	if err != nil {
   117  		return err
   118  	}
   119  	if l.config.NoNewPrivileges {
   120  		if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
   121  			return err
   122  		}
   123  	}
   124  	// Tell our parent that we're ready to Execv. This must be done before the
   125  	// Seccomp rules have been applied, because we need to be able to read and
   126  	// write to a socket.
   127  	if err := syncParentReady(l.pipe); err != nil {
   128  		return err
   129  	}
   130  	// Without NoNewPrivileges seccomp is a privileged operation, so we need to
   131  	// do this before dropping capabilities; otherwise do it as late as possible
   132  	// just before execve so as few syscalls take place after it as possible.
   133  	if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
   134  		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
   135  			return err
   136  		}
   137  	}
   138  	if err := finalizeNamespace(l.config); err != nil {
   139  		return err
   140  	}
   141  	// finalizeNamespace can change user/group which clears the parent death
   142  	// signal, so we restore it here.
   143  	if err := pdeath.Restore(); err != nil {
   144  		return err
   145  	}
   146  	// compare the parent from the initial start of the init process and make sure that it did not change.
   147  	// if the parent changes that means it died and we were reparented to something else so we should
   148  	// just kill ourself and not cause problems for someone else.
   149  	if syscall.Getppid() != l.parentPid {
   150  		return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
   151  	}
   152  	// check for the arg before waiting to make sure it exists and it is returned
   153  	// as a create time error.
   154  	name, err := exec.LookPath(l.config.Args[0])
   155  	if err != nil {
   156  		return err
   157  	}
   158  	// close the pipe to signal that we have completed our init.
   159  	l.pipe.Close()
   160  	// wait for the fifo to be opened on the other side before
   161  	// exec'ing the users process.
   162  	fd, err := syscall.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|syscall.O_CLOEXEC, 0)
   163  	if err != nil {
   164  		return newSystemErrorWithCause(err, "openat exec fifo")
   165  	}
   166  	if _, err := syscall.Write(fd, []byte("0")); err != nil {
   167  		return newSystemErrorWithCause(err, "write 0 exec fifo")
   168  	}
   169  	if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
   170  		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
   171  			return newSystemErrorWithCause(err, "init seccomp")
   172  		}
   173  	}
   174  	if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
   175  		return newSystemErrorWithCause(err, "exec user process")
   176  	}
   177  	return nil
   178  }