github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/utils_linux.go (about) 1 package main 2 3 import ( 4 "errors" 5 "fmt" 6 "net" 7 "os" 8 "path/filepath" 9 "strconv" 10 11 "github.com/coreos/go-systemd/v22/activation" 12 "github.com/opencontainers/runtime-spec/specs-go" 13 selinux "github.com/opencontainers/selinux/go-selinux" 14 "github.com/sirupsen/logrus" 15 "github.com/urfave/cli" 16 "golang.org/x/sys/unix" 17 18 "github.com/opencontainers/runc/libcontainer" 19 "github.com/opencontainers/runc/libcontainer/configs" 20 "github.com/opencontainers/runc/libcontainer/specconv" 21 "github.com/opencontainers/runc/libcontainer/system/kernelversion" 22 "github.com/opencontainers/runc/libcontainer/utils" 23 ) 24 25 var errEmptyID = errors.New("container id cannot be empty") 26 27 // getContainer returns the specified container instance by loading it from 28 // a state directory (root). 29 func getContainer(context *cli.Context) (*libcontainer.Container, error) { 30 id := context.Args().First() 31 if id == "" { 32 return nil, errEmptyID 33 } 34 root := context.GlobalString("root") 35 return libcontainer.Load(root, id) 36 } 37 38 func getDefaultImagePath() string { 39 cwd, err := os.Getwd() 40 if err != nil { 41 panic(err) 42 } 43 return filepath.Join(cwd, "checkpoint") 44 } 45 46 // newProcess returns a new libcontainer Process with the arguments from the 47 // spec and stdio from the current process. 48 func newProcess(p specs.Process) (*libcontainer.Process, error) { 49 lp := &libcontainer.Process{ 50 Args: p.Args, 51 Env: p.Env, 52 // TODO: fix libcontainer's API to better support uid/gid in a typesafe way. 53 User: fmt.Sprintf("%d:%d", p.User.UID, p.User.GID), 54 Cwd: p.Cwd, 55 Label: p.SelinuxLabel, 56 NoNewPrivileges: &p.NoNewPrivileges, 57 AppArmorProfile: p.ApparmorProfile, 58 } 59 60 if p.ConsoleSize != nil { 61 lp.ConsoleWidth = uint16(p.ConsoleSize.Width) 62 lp.ConsoleHeight = uint16(p.ConsoleSize.Height) 63 } 64 65 if p.Scheduler != nil { 66 s := *p.Scheduler 67 lp.Scheduler = &s 68 } 69 70 if p.IOPriority != nil { 71 ioPriority := *p.IOPriority 72 lp.IOPriority = &ioPriority 73 } 74 75 if p.Capabilities != nil { 76 lp.Capabilities = &configs.Capabilities{} 77 lp.Capabilities.Bounding = p.Capabilities.Bounding 78 lp.Capabilities.Effective = p.Capabilities.Effective 79 lp.Capabilities.Inheritable = p.Capabilities.Inheritable 80 lp.Capabilities.Permitted = p.Capabilities.Permitted 81 lp.Capabilities.Ambient = p.Capabilities.Ambient 82 } 83 for _, gid := range p.User.AdditionalGids { 84 lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10)) 85 } 86 for _, rlimit := range p.Rlimits { 87 rl, err := createLibContainerRlimit(rlimit) 88 if err != nil { 89 return nil, err 90 } 91 lp.Rlimits = append(lp.Rlimits, rl) 92 } 93 return lp, nil 94 } 95 96 // setupIO modifies the given process config according to the options. 97 func setupIO(process *libcontainer.Process, rootuid, rootgid int, createTTY, detach bool, sockpath string) (*tty, error) { 98 if createTTY { 99 process.Stdin = nil 100 process.Stdout = nil 101 process.Stderr = nil 102 t := &tty{} 103 if !detach { 104 if err := t.initHostConsole(); err != nil { 105 return nil, err 106 } 107 parent, child, err := utils.NewSockPair("console") 108 if err != nil { 109 return nil, err 110 } 111 process.ConsoleSocket = child 112 t.postStart = append(t.postStart, parent, child) 113 t.consoleC = make(chan error, 1) 114 go func() { 115 t.consoleC <- t.recvtty(parent) 116 }() 117 } else { 118 // the caller of runc will handle receiving the console master 119 conn, err := net.Dial("unix", sockpath) 120 if err != nil { 121 return nil, err 122 } 123 uc, ok := conn.(*net.UnixConn) 124 if !ok { 125 return nil, errors.New("casting to UnixConn failed") 126 } 127 t.postStart = append(t.postStart, uc) 128 socket, err := uc.File() 129 if err != nil { 130 return nil, err 131 } 132 t.postStart = append(t.postStart, socket) 133 process.ConsoleSocket = socket 134 } 135 return t, nil 136 } 137 // when runc will detach the caller provides the stdio to runc via runc's 0,1,2 138 // and the container's process inherits runc's stdio. 139 if detach { 140 inheritStdio(process) 141 return &tty{}, nil 142 } 143 return setupProcessPipes(process, rootuid, rootgid) 144 } 145 146 // createPidFile creates a file with the processes pid inside it atomically 147 // it creates a temp file with the paths filename + '.' infront of it 148 // then renames the file 149 func createPidFile(path string, process *libcontainer.Process) error { 150 pid, err := process.Pid() 151 if err != nil { 152 return err 153 } 154 var ( 155 tmpDir = filepath.Dir(path) 156 tmpName = filepath.Join(tmpDir, "."+filepath.Base(path)) 157 ) 158 f, err := os.OpenFile(tmpName, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0o666) 159 if err != nil { 160 return err 161 } 162 _, err = f.WriteString(strconv.Itoa(pid)) 163 f.Close() 164 if err != nil { 165 return err 166 } 167 return os.Rename(tmpName, path) 168 } 169 170 func createContainer(context *cli.Context, id string, spec *specs.Spec) (*libcontainer.Container, error) { 171 rootlessCg, err := shouldUseRootlessCgroupManager(context) 172 if err != nil { 173 return nil, err 174 } 175 config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{ 176 CgroupName: id, 177 UseSystemdCgroup: context.GlobalBool("systemd-cgroup"), 178 NoPivotRoot: context.Bool("no-pivot"), 179 NoNewKeyring: context.Bool("no-new-keyring"), 180 Spec: spec, 181 RootlessEUID: os.Geteuid() != 0, 182 RootlessCgroups: rootlessCg, 183 }) 184 if err != nil { 185 return nil, err 186 } 187 188 root := context.GlobalString("root") 189 return libcontainer.Create(root, id, config) 190 } 191 192 type runner struct { 193 init bool 194 enableSubreaper bool 195 shouldDestroy bool 196 detach bool 197 listenFDs []*os.File 198 preserveFDs int 199 pidFile string 200 consoleSocket string 201 pidfdSocket string 202 container *libcontainer.Container 203 action CtAct 204 notifySocket *notifySocket 205 criuOpts *libcontainer.CriuOpts 206 subCgroupPaths map[string]string 207 } 208 209 func (r *runner) run(config *specs.Process) (int, error) { 210 var err error 211 defer func() { 212 if err != nil { 213 r.destroy() 214 } 215 }() 216 if err = r.checkTerminal(config); err != nil { 217 return -1, err 218 } 219 process, err := newProcess(*config) 220 if err != nil { 221 return -1, err 222 } 223 process.LogLevel = strconv.Itoa(int(logrus.GetLevel())) 224 // Populate the fields that come from runner. 225 process.Init = r.init 226 process.SubCgroupPaths = r.subCgroupPaths 227 if len(r.listenFDs) > 0 { 228 process.Env = append(process.Env, "LISTEN_FDS="+strconv.Itoa(len(r.listenFDs)), "LISTEN_PID=1") 229 process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...) 230 } 231 baseFd := 3 + len(process.ExtraFiles) 232 procSelfFd, closer := utils.ProcThreadSelf("fd/") 233 defer closer() 234 for i := baseFd; i < baseFd+r.preserveFDs; i++ { 235 _, err = os.Stat(filepath.Join(procSelfFd, strconv.Itoa(i))) 236 if err != nil { 237 return -1, fmt.Errorf("unable to stat preserved-fd %d (of %d): %w", i-baseFd, r.preserveFDs, err) 238 } 239 process.ExtraFiles = append(process.ExtraFiles, os.NewFile(uintptr(i), "PreserveFD:"+strconv.Itoa(i))) 240 } 241 rootuid, err := r.container.Config().HostRootUID() 242 if err != nil { 243 return -1, err 244 } 245 rootgid, err := r.container.Config().HostRootGID() 246 if err != nil { 247 return -1, err 248 } 249 detach := r.detach || (r.action == CT_ACT_CREATE) 250 // Setting up IO is a two stage process. We need to modify process to deal 251 // with detaching containers, and then we get a tty after the container has 252 // started. 253 handler := newSignalHandler(r.enableSubreaper, r.notifySocket) 254 tty, err := setupIO(process, rootuid, rootgid, config.Terminal, detach, r.consoleSocket) 255 if err != nil { 256 return -1, err 257 } 258 defer tty.Close() 259 260 if r.pidfdSocket != "" { 261 connClose, err := setupPidfdSocket(process, r.pidfdSocket) 262 if err != nil { 263 return -1, err 264 } 265 defer connClose() 266 } 267 268 switch r.action { 269 case CT_ACT_CREATE: 270 err = r.container.Start(process) 271 case CT_ACT_RESTORE: 272 err = r.container.Restore(process, r.criuOpts) 273 case CT_ACT_RUN: 274 err = r.container.Run(process) 275 default: 276 panic("Unknown action") 277 } 278 if err != nil { 279 return -1, err 280 } 281 if err = tty.waitConsole(); err != nil { 282 r.terminate(process) 283 return -1, err 284 } 285 tty.ClosePostStart() 286 if r.pidFile != "" { 287 if err = createPidFile(r.pidFile, process); err != nil { 288 r.terminate(process) 289 return -1, err 290 } 291 } 292 status, err := handler.forward(process, tty, detach) 293 if err != nil { 294 r.terminate(process) 295 } 296 if detach { 297 return 0, nil 298 } 299 if err == nil { 300 r.destroy() 301 } 302 return status, err 303 } 304 305 func (r *runner) destroy() { 306 if r.shouldDestroy { 307 if err := r.container.Destroy(); err != nil { 308 logrus.Warn(err) 309 } 310 } 311 } 312 313 func (r *runner) terminate(p *libcontainer.Process) { 314 _ = p.Signal(unix.SIGKILL) 315 _, _ = p.Wait() 316 } 317 318 func (r *runner) checkTerminal(config *specs.Process) error { 319 detach := r.detach || (r.action == CT_ACT_CREATE) 320 // Check command-line for sanity. 321 if detach && config.Terminal && r.consoleSocket == "" { 322 return errors.New("cannot allocate tty if runc will detach without setting console socket") 323 } 324 if (!detach || !config.Terminal) && r.consoleSocket != "" { 325 return errors.New("cannot use console socket if runc will not detach or allocate tty") 326 } 327 return nil 328 } 329 330 func validateProcessSpec(spec *specs.Process) error { 331 if spec == nil { 332 return errors.New("process property must not be empty") 333 } 334 if spec.Cwd == "" { 335 return errors.New("Cwd property must not be empty") 336 } 337 if !filepath.IsAbs(spec.Cwd) { 338 return errors.New("Cwd must be an absolute path") 339 } 340 if len(spec.Args) == 0 { 341 return errors.New("args must not be empty") 342 } 343 if spec.SelinuxLabel != "" && !selinux.GetEnabled() { 344 return errors.New("selinux label is specified in config, but selinux is disabled or not supported") 345 } 346 return nil 347 } 348 349 type CtAct uint8 350 351 const ( 352 CT_ACT_CREATE CtAct = iota + 1 353 CT_ACT_RUN 354 CT_ACT_RESTORE 355 ) 356 357 func startContainer(context *cli.Context, action CtAct, criuOpts *libcontainer.CriuOpts) (int, error) { 358 if err := revisePidFile(context); err != nil { 359 return -1, err 360 } 361 spec, err := setupSpec(context) 362 if err != nil { 363 return -1, err 364 } 365 366 id := context.Args().First() 367 if id == "" { 368 return -1, errEmptyID 369 } 370 371 notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id) 372 if notifySocket != nil { 373 notifySocket.setupSpec(spec) 374 } 375 376 container, err := createContainer(context, id, spec) 377 if err != nil { 378 return -1, err 379 } 380 381 if notifySocket != nil { 382 if err := notifySocket.setupSocketDirectory(); err != nil { 383 return -1, err 384 } 385 if action == CT_ACT_RUN { 386 if err := notifySocket.bindSocket(); err != nil { 387 return -1, err 388 } 389 } 390 } 391 392 // Support on-demand socket activation by passing file descriptors into the container init process. 393 listenFDs := []*os.File{} 394 if os.Getenv("LISTEN_FDS") != "" { 395 listenFDs = activation.Files(false) 396 } 397 398 r := &runner{ 399 enableSubreaper: !context.Bool("no-subreaper"), 400 shouldDestroy: !context.Bool("keep"), 401 container: container, 402 listenFDs: listenFDs, 403 notifySocket: notifySocket, 404 consoleSocket: context.String("console-socket"), 405 pidfdSocket: context.String("pidfd-socket"), 406 detach: context.Bool("detach"), 407 pidFile: context.String("pid-file"), 408 preserveFDs: context.Int("preserve-fds"), 409 action: action, 410 criuOpts: criuOpts, 411 init: true, 412 } 413 return r.run(spec.Process) 414 } 415 416 func setupPidfdSocket(process *libcontainer.Process, sockpath string) (_clean func(), _ error) { 417 linux530 := kernelversion.KernelVersion{Kernel: 5, Major: 3} 418 ok, err := kernelversion.GreaterEqualThan(linux530) 419 if err != nil { 420 return nil, err 421 } 422 if !ok { 423 return nil, fmt.Errorf("--pidfd-socket requires >= v5.3 kernel") 424 } 425 426 conn, err := net.Dial("unix", sockpath) 427 if err != nil { 428 return nil, fmt.Errorf("failed to dail %s: %w", sockpath, err) 429 } 430 431 uc, ok := conn.(*net.UnixConn) 432 if !ok { 433 conn.Close() 434 return nil, errors.New("failed to cast to UnixConn") 435 } 436 437 socket, err := uc.File() 438 if err != nil { 439 conn.Close() 440 return nil, fmt.Errorf("failed to dup socket: %w", err) 441 } 442 443 process.PidfdSocket = socket 444 return func() { 445 conn.Close() 446 }, nil 447 }