github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/utils_linux.go (about)

     1  package main
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"net"
     7  	"os"
     8  	"path/filepath"
     9  	"strconv"
    10  
    11  	"github.com/coreos/go-systemd/v22/activation"
    12  	"github.com/opencontainers/runtime-spec/specs-go"
    13  	selinux "github.com/opencontainers/selinux/go-selinux"
    14  	"github.com/sirupsen/logrus"
    15  	"github.com/urfave/cli"
    16  	"golang.org/x/sys/unix"
    17  
    18  	"github.com/opencontainers/runc/libcontainer"
    19  	"github.com/opencontainers/runc/libcontainer/configs"
    20  	"github.com/opencontainers/runc/libcontainer/specconv"
    21  	"github.com/opencontainers/runc/libcontainer/system/kernelversion"
    22  	"github.com/opencontainers/runc/libcontainer/utils"
    23  )
    24  
    25  var errEmptyID = errors.New("container id cannot be empty")
    26  
    27  // getContainer returns the specified container instance by loading it from
    28  // a state directory (root).
    29  func getContainer(context *cli.Context) (*libcontainer.Container, error) {
    30  	id := context.Args().First()
    31  	if id == "" {
    32  		return nil, errEmptyID
    33  	}
    34  	root := context.GlobalString("root")
    35  	return libcontainer.Load(root, id)
    36  }
    37  
    38  func getDefaultImagePath() string {
    39  	cwd, err := os.Getwd()
    40  	if err != nil {
    41  		panic(err)
    42  	}
    43  	return filepath.Join(cwd, "checkpoint")
    44  }
    45  
    46  // newProcess returns a new libcontainer Process with the arguments from the
    47  // spec and stdio from the current process.
    48  func newProcess(p specs.Process) (*libcontainer.Process, error) {
    49  	lp := &libcontainer.Process{
    50  		Args: p.Args,
    51  		Env:  p.Env,
    52  		// TODO: fix libcontainer's API to better support uid/gid in a typesafe way.
    53  		User:            fmt.Sprintf("%d:%d", p.User.UID, p.User.GID),
    54  		Cwd:             p.Cwd,
    55  		Label:           p.SelinuxLabel,
    56  		NoNewPrivileges: &p.NoNewPrivileges,
    57  		AppArmorProfile: p.ApparmorProfile,
    58  	}
    59  
    60  	if p.ConsoleSize != nil {
    61  		lp.ConsoleWidth = uint16(p.ConsoleSize.Width)
    62  		lp.ConsoleHeight = uint16(p.ConsoleSize.Height)
    63  	}
    64  
    65  	if p.Scheduler != nil {
    66  		s := *p.Scheduler
    67  		lp.Scheduler = &s
    68  	}
    69  
    70  	if p.IOPriority != nil {
    71  		ioPriority := *p.IOPriority
    72  		lp.IOPriority = &ioPriority
    73  	}
    74  
    75  	if p.Capabilities != nil {
    76  		lp.Capabilities = &configs.Capabilities{}
    77  		lp.Capabilities.Bounding = p.Capabilities.Bounding
    78  		lp.Capabilities.Effective = p.Capabilities.Effective
    79  		lp.Capabilities.Inheritable = p.Capabilities.Inheritable
    80  		lp.Capabilities.Permitted = p.Capabilities.Permitted
    81  		lp.Capabilities.Ambient = p.Capabilities.Ambient
    82  	}
    83  	for _, gid := range p.User.AdditionalGids {
    84  		lp.AdditionalGroups = append(lp.AdditionalGroups, strconv.FormatUint(uint64(gid), 10))
    85  	}
    86  	for _, rlimit := range p.Rlimits {
    87  		rl, err := createLibContainerRlimit(rlimit)
    88  		if err != nil {
    89  			return nil, err
    90  		}
    91  		lp.Rlimits = append(lp.Rlimits, rl)
    92  	}
    93  	return lp, nil
    94  }
    95  
    96  // setupIO modifies the given process config according to the options.
    97  func setupIO(process *libcontainer.Process, rootuid, rootgid int, createTTY, detach bool, sockpath string) (*tty, error) {
    98  	if createTTY {
    99  		process.Stdin = nil
   100  		process.Stdout = nil
   101  		process.Stderr = nil
   102  		t := &tty{}
   103  		if !detach {
   104  			if err := t.initHostConsole(); err != nil {
   105  				return nil, err
   106  			}
   107  			parent, child, err := utils.NewSockPair("console")
   108  			if err != nil {
   109  				return nil, err
   110  			}
   111  			process.ConsoleSocket = child
   112  			t.postStart = append(t.postStart, parent, child)
   113  			t.consoleC = make(chan error, 1)
   114  			go func() {
   115  				t.consoleC <- t.recvtty(parent)
   116  			}()
   117  		} else {
   118  			// the caller of runc will handle receiving the console master
   119  			conn, err := net.Dial("unix", sockpath)
   120  			if err != nil {
   121  				return nil, err
   122  			}
   123  			uc, ok := conn.(*net.UnixConn)
   124  			if !ok {
   125  				return nil, errors.New("casting to UnixConn failed")
   126  			}
   127  			t.postStart = append(t.postStart, uc)
   128  			socket, err := uc.File()
   129  			if err != nil {
   130  				return nil, err
   131  			}
   132  			t.postStart = append(t.postStart, socket)
   133  			process.ConsoleSocket = socket
   134  		}
   135  		return t, nil
   136  	}
   137  	// when runc will detach the caller provides the stdio to runc via runc's 0,1,2
   138  	// and the container's process inherits runc's stdio.
   139  	if detach {
   140  		inheritStdio(process)
   141  		return &tty{}, nil
   142  	}
   143  	return setupProcessPipes(process, rootuid, rootgid)
   144  }
   145  
   146  // createPidFile creates a file with the processes pid inside it atomically
   147  // it creates a temp file with the paths filename + '.' infront of it
   148  // then renames the file
   149  func createPidFile(path string, process *libcontainer.Process) error {
   150  	pid, err := process.Pid()
   151  	if err != nil {
   152  		return err
   153  	}
   154  	var (
   155  		tmpDir  = filepath.Dir(path)
   156  		tmpName = filepath.Join(tmpDir, "."+filepath.Base(path))
   157  	)
   158  	f, err := os.OpenFile(tmpName, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0o666)
   159  	if err != nil {
   160  		return err
   161  	}
   162  	_, err = f.WriteString(strconv.Itoa(pid))
   163  	f.Close()
   164  	if err != nil {
   165  		return err
   166  	}
   167  	return os.Rename(tmpName, path)
   168  }
   169  
   170  func createContainer(context *cli.Context, id string, spec *specs.Spec) (*libcontainer.Container, error) {
   171  	rootlessCg, err := shouldUseRootlessCgroupManager(context)
   172  	if err != nil {
   173  		return nil, err
   174  	}
   175  	config, err := specconv.CreateLibcontainerConfig(&specconv.CreateOpts{
   176  		CgroupName:       id,
   177  		UseSystemdCgroup: context.GlobalBool("systemd-cgroup"),
   178  		NoPivotRoot:      context.Bool("no-pivot"),
   179  		NoNewKeyring:     context.Bool("no-new-keyring"),
   180  		Spec:             spec,
   181  		RootlessEUID:     os.Geteuid() != 0,
   182  		RootlessCgroups:  rootlessCg,
   183  	})
   184  	if err != nil {
   185  		return nil, err
   186  	}
   187  
   188  	root := context.GlobalString("root")
   189  	return libcontainer.Create(root, id, config)
   190  }
   191  
   192  type runner struct {
   193  	init            bool
   194  	enableSubreaper bool
   195  	shouldDestroy   bool
   196  	detach          bool
   197  	listenFDs       []*os.File
   198  	preserveFDs     int
   199  	pidFile         string
   200  	consoleSocket   string
   201  	pidfdSocket     string
   202  	container       *libcontainer.Container
   203  	action          CtAct
   204  	notifySocket    *notifySocket
   205  	criuOpts        *libcontainer.CriuOpts
   206  	subCgroupPaths  map[string]string
   207  }
   208  
   209  func (r *runner) run(config *specs.Process) (int, error) {
   210  	var err error
   211  	defer func() {
   212  		if err != nil {
   213  			r.destroy()
   214  		}
   215  	}()
   216  	if err = r.checkTerminal(config); err != nil {
   217  		return -1, err
   218  	}
   219  	process, err := newProcess(*config)
   220  	if err != nil {
   221  		return -1, err
   222  	}
   223  	process.LogLevel = strconv.Itoa(int(logrus.GetLevel()))
   224  	// Populate the fields that come from runner.
   225  	process.Init = r.init
   226  	process.SubCgroupPaths = r.subCgroupPaths
   227  	if len(r.listenFDs) > 0 {
   228  		process.Env = append(process.Env, "LISTEN_FDS="+strconv.Itoa(len(r.listenFDs)), "LISTEN_PID=1")
   229  		process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)
   230  	}
   231  	baseFd := 3 + len(process.ExtraFiles)
   232  	procSelfFd, closer := utils.ProcThreadSelf("fd/")
   233  	defer closer()
   234  	for i := baseFd; i < baseFd+r.preserveFDs; i++ {
   235  		_, err = os.Stat(filepath.Join(procSelfFd, strconv.Itoa(i)))
   236  		if err != nil {
   237  			return -1, fmt.Errorf("unable to stat preserved-fd %d (of %d): %w", i-baseFd, r.preserveFDs, err)
   238  		}
   239  		process.ExtraFiles = append(process.ExtraFiles, os.NewFile(uintptr(i), "PreserveFD:"+strconv.Itoa(i)))
   240  	}
   241  	rootuid, err := r.container.Config().HostRootUID()
   242  	if err != nil {
   243  		return -1, err
   244  	}
   245  	rootgid, err := r.container.Config().HostRootGID()
   246  	if err != nil {
   247  		return -1, err
   248  	}
   249  	detach := r.detach || (r.action == CT_ACT_CREATE)
   250  	// Setting up IO is a two stage process. We need to modify process to deal
   251  	// with detaching containers, and then we get a tty after the container has
   252  	// started.
   253  	handler := newSignalHandler(r.enableSubreaper, r.notifySocket)
   254  	tty, err := setupIO(process, rootuid, rootgid, config.Terminal, detach, r.consoleSocket)
   255  	if err != nil {
   256  		return -1, err
   257  	}
   258  	defer tty.Close()
   259  
   260  	if r.pidfdSocket != "" {
   261  		connClose, err := setupPidfdSocket(process, r.pidfdSocket)
   262  		if err != nil {
   263  			return -1, err
   264  		}
   265  		defer connClose()
   266  	}
   267  
   268  	switch r.action {
   269  	case CT_ACT_CREATE:
   270  		err = r.container.Start(process)
   271  	case CT_ACT_RESTORE:
   272  		err = r.container.Restore(process, r.criuOpts)
   273  	case CT_ACT_RUN:
   274  		err = r.container.Run(process)
   275  	default:
   276  		panic("Unknown action")
   277  	}
   278  	if err != nil {
   279  		return -1, err
   280  	}
   281  	if err = tty.waitConsole(); err != nil {
   282  		r.terminate(process)
   283  		return -1, err
   284  	}
   285  	tty.ClosePostStart()
   286  	if r.pidFile != "" {
   287  		if err = createPidFile(r.pidFile, process); err != nil {
   288  			r.terminate(process)
   289  			return -1, err
   290  		}
   291  	}
   292  	status, err := handler.forward(process, tty, detach)
   293  	if err != nil {
   294  		r.terminate(process)
   295  	}
   296  	if detach {
   297  		return 0, nil
   298  	}
   299  	if err == nil {
   300  		r.destroy()
   301  	}
   302  	return status, err
   303  }
   304  
   305  func (r *runner) destroy() {
   306  	if r.shouldDestroy {
   307  		if err := r.container.Destroy(); err != nil {
   308  			logrus.Warn(err)
   309  		}
   310  	}
   311  }
   312  
   313  func (r *runner) terminate(p *libcontainer.Process) {
   314  	_ = p.Signal(unix.SIGKILL)
   315  	_, _ = p.Wait()
   316  }
   317  
   318  func (r *runner) checkTerminal(config *specs.Process) error {
   319  	detach := r.detach || (r.action == CT_ACT_CREATE)
   320  	// Check command-line for sanity.
   321  	if detach && config.Terminal && r.consoleSocket == "" {
   322  		return errors.New("cannot allocate tty if runc will detach without setting console socket")
   323  	}
   324  	if (!detach || !config.Terminal) && r.consoleSocket != "" {
   325  		return errors.New("cannot use console socket if runc will not detach or allocate tty")
   326  	}
   327  	return nil
   328  }
   329  
   330  func validateProcessSpec(spec *specs.Process) error {
   331  	if spec == nil {
   332  		return errors.New("process property must not be empty")
   333  	}
   334  	if spec.Cwd == "" {
   335  		return errors.New("Cwd property must not be empty")
   336  	}
   337  	if !filepath.IsAbs(spec.Cwd) {
   338  		return errors.New("Cwd must be an absolute path")
   339  	}
   340  	if len(spec.Args) == 0 {
   341  		return errors.New("args must not be empty")
   342  	}
   343  	if spec.SelinuxLabel != "" && !selinux.GetEnabled() {
   344  		return errors.New("selinux label is specified in config, but selinux is disabled or not supported")
   345  	}
   346  	return nil
   347  }
   348  
   349  type CtAct uint8
   350  
   351  const (
   352  	CT_ACT_CREATE CtAct = iota + 1
   353  	CT_ACT_RUN
   354  	CT_ACT_RESTORE
   355  )
   356  
   357  func startContainer(context *cli.Context, action CtAct, criuOpts *libcontainer.CriuOpts) (int, error) {
   358  	if err := revisePidFile(context); err != nil {
   359  		return -1, err
   360  	}
   361  	spec, err := setupSpec(context)
   362  	if err != nil {
   363  		return -1, err
   364  	}
   365  
   366  	id := context.Args().First()
   367  	if id == "" {
   368  		return -1, errEmptyID
   369  	}
   370  
   371  	notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
   372  	if notifySocket != nil {
   373  		notifySocket.setupSpec(spec)
   374  	}
   375  
   376  	container, err := createContainer(context, id, spec)
   377  	if err != nil {
   378  		return -1, err
   379  	}
   380  
   381  	if notifySocket != nil {
   382  		if err := notifySocket.setupSocketDirectory(); err != nil {
   383  			return -1, err
   384  		}
   385  		if action == CT_ACT_RUN {
   386  			if err := notifySocket.bindSocket(); err != nil {
   387  				return -1, err
   388  			}
   389  		}
   390  	}
   391  
   392  	// Support on-demand socket activation by passing file descriptors into the container init process.
   393  	listenFDs := []*os.File{}
   394  	if os.Getenv("LISTEN_FDS") != "" {
   395  		listenFDs = activation.Files(false)
   396  	}
   397  
   398  	r := &runner{
   399  		enableSubreaper: !context.Bool("no-subreaper"),
   400  		shouldDestroy:   !context.Bool("keep"),
   401  		container:       container,
   402  		listenFDs:       listenFDs,
   403  		notifySocket:    notifySocket,
   404  		consoleSocket:   context.String("console-socket"),
   405  		pidfdSocket:     context.String("pidfd-socket"),
   406  		detach:          context.Bool("detach"),
   407  		pidFile:         context.String("pid-file"),
   408  		preserveFDs:     context.Int("preserve-fds"),
   409  		action:          action,
   410  		criuOpts:        criuOpts,
   411  		init:            true,
   412  	}
   413  	return r.run(spec.Process)
   414  }
   415  
   416  func setupPidfdSocket(process *libcontainer.Process, sockpath string) (_clean func(), _ error) {
   417  	linux530 := kernelversion.KernelVersion{Kernel: 5, Major: 3}
   418  	ok, err := kernelversion.GreaterEqualThan(linux530)
   419  	if err != nil {
   420  		return nil, err
   421  	}
   422  	if !ok {
   423  		return nil, fmt.Errorf("--pidfd-socket requires >= v5.3 kernel")
   424  	}
   425  
   426  	conn, err := net.Dial("unix", sockpath)
   427  	if err != nil {
   428  		return nil, fmt.Errorf("failed to dail %s: %w", sockpath, err)
   429  	}
   430  
   431  	uc, ok := conn.(*net.UnixConn)
   432  	if !ok {
   433  		conn.Close()
   434  		return nil, errors.New("failed to cast to UnixConn")
   435  	}
   436  
   437  	socket, err := uc.File()
   438  	if err != nil {
   439  		conn.Close()
   440  		return nil, fmt.Errorf("failed to dup socket: %w", err)
   441  	}
   442  
   443  	process.PidfdSocket = socket
   444  	return func() {
   445  		conn.Close()
   446  	}, nil
   447  }