github.com/zhuohuang-hust/src-cbuild@v0.0.0-20230105071821-c7aab3e7c840/mergeCode/runc/libcontainer/standard_init_linux.go (about) 1 // +build linux 2 3 package libcontainer 4 5 import ( 6 "fmt" 7 "io" 8 "os" 9 "os/exec" 10 "syscall" 11 12 "github.com/opencontainers/runc/libcontainer/apparmor" 13 "github.com/opencontainers/runc/libcontainer/configs" 14 "github.com/opencontainers/runc/libcontainer/keys" 15 "github.com/opencontainers/runc/libcontainer/label" 16 "github.com/opencontainers/runc/libcontainer/seccomp" 17 "github.com/opencontainers/runc/libcontainer/system" 18 ) 19 20 type linuxStandardInit struct { 21 pipe io.ReadWriteCloser 22 parentPid int 23 stateDirFD int 24 config *initConfig 25 } 26 27 func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) { 28 var newperms uint32 29 30 if l.config.Config.Namespaces.Contains(configs.NEWUSER) { 31 // with user ns we need 'other' search permissions 32 newperms = 0x8 33 } else { 34 // without user ns we need 'UID' search permissions 35 newperms = 0x80000 36 } 37 38 // create a unique per session container name that we can 39 // join in setns; however, other containers can also join it 40 return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms 41 } 42 43 // PR_SET_NO_NEW_PRIVS isn't exposed in Golang so we define it ourselves copying the value 44 // the kernel 45 const PR_SET_NO_NEW_PRIVS = 0x26 46 47 func (l *linuxStandardInit) Init() error { 48 if !l.config.Config.NoNewKeyring { 49 ringname, keepperms, newperms := l.getSessionRingParams() 50 51 // do not inherit the parent's session keyring 52 sessKeyId, err := keys.JoinSessionKeyring(ringname) 53 if err != nil { 54 return err 55 } 56 // make session keyring searcheable 57 if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil { 58 return err 59 } 60 } 61 62 var console *linuxConsole 63 if l.config.Console != "" { 64 console = newConsoleFromPath(l.config.Console) 65 if err := console.dupStdio(); err != nil { 66 return err 67 } 68 } 69 if console != nil { 70 if err := system.Setctty(); err != nil { 71 return err 72 } 73 } 74 if err := setupNetwork(l.config); err != nil { 75 return err 76 } 77 if err := setupRoute(l.config.Config); err != nil { 78 return err 79 } 80 81 label.Init() 82 // InitializeMountNamespace() can be executed only for a new mount namespace 83 if l.config.Config.Namespaces.Contains(configs.NEWNS) { 84 if err := setupRootfs(l.config.Config, console, l.pipe); err != nil { 85 return err 86 } 87 } 88 if hostname := l.config.Config.Hostname; hostname != "" { 89 if err := syscall.Sethostname([]byte(hostname)); err != nil { 90 return err 91 } 92 } 93 if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { 94 return err 95 } 96 if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { 97 return err 98 } 99 100 for key, value := range l.config.Config.Sysctl { 101 if err := writeSystemProperty(key, value); err != nil { 102 return err 103 } 104 } 105 for _, path := range l.config.Config.ReadonlyPaths { 106 if err := remountReadonly(path); err != nil { 107 return err 108 } 109 } 110 for _, path := range l.config.Config.MaskPaths { 111 if err := maskPath(path); err != nil { 112 return err 113 } 114 } 115 pdeath, err := system.GetParentDeathSignal() 116 if err != nil { 117 return err 118 } 119 if l.config.NoNewPrivileges { 120 if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { 121 return err 122 } 123 } 124 // Tell our parent that we're ready to Execv. This must be done before the 125 // Seccomp rules have been applied, because we need to be able to read and 126 // write to a socket. 127 if err := syncParentReady(l.pipe); err != nil { 128 return err 129 } 130 // Without NoNewPrivileges seccomp is a privileged operation, so we need to 131 // do this before dropping capabilities; otherwise do it as late as possible 132 // just before execve so as few syscalls take place after it as possible. 133 if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges { 134 if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { 135 return err 136 } 137 } 138 if err := finalizeNamespace(l.config); err != nil { 139 return err 140 } 141 // finalizeNamespace can change user/group which clears the parent death 142 // signal, so we restore it here. 143 if err := pdeath.Restore(); err != nil { 144 return err 145 } 146 // compare the parent from the initial start of the init process and make sure that it did not change. 147 // if the parent changes that means it died and we were reparented to something else so we should 148 // just kill ourself and not cause problems for someone else. 149 if syscall.Getppid() != l.parentPid { 150 return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) 151 } 152 // check for the arg before waiting to make sure it exists and it is returned 153 // as a create time error. 154 name, err := exec.LookPath(l.config.Args[0]) 155 if err != nil { 156 return err 157 } 158 // close the pipe to signal that we have completed our init. 159 l.pipe.Close() 160 // wait for the fifo to be opened on the other side before 161 // exec'ing the users process. 162 fd, err := syscall.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|syscall.O_CLOEXEC, 0) 163 if err != nil { 164 return newSystemErrorWithCause(err, "openat exec fifo") 165 } 166 if _, err := syscall.Write(fd, []byte("0")); err != nil { 167 return newSystemErrorWithCause(err, "write 0 exec fifo") 168 } 169 if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { 170 if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { 171 return newSystemErrorWithCause(err, "init seccomp") 172 } 173 } 174 if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil { 175 return newSystemErrorWithCause(err, "exec user process") 176 } 177 return nil 178 }