github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/runsc/cmd/exec.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cmd
    16  
    17  import (
    18  	"context"
    19  	"encoding/json"
    20  	"fmt"
    21  	"io/ioutil"
    22  	"os"
    23  	"os/exec"
    24  	"path/filepath"
    25  	"strconv"
    26  	"strings"
    27  	"time"
    28  
    29  	"github.com/google/subcommands"
    30  	specs "github.com/opencontainers/runtime-spec/specs-go"
    31  	"golang.org/x/sys/unix"
    32  	"github.com/metacubex/gvisor/pkg/log"
    33  	"github.com/metacubex/gvisor/pkg/sentry/control"
    34  	"github.com/metacubex/gvisor/pkg/sentry/kernel/auth"
    35  	"github.com/metacubex/gvisor/runsc/cmd/util"
    36  	"github.com/metacubex/gvisor/runsc/config"
    37  	"github.com/metacubex/gvisor/runsc/console"
    38  	"github.com/metacubex/gvisor/runsc/container"
    39  	"github.com/metacubex/gvisor/runsc/flag"
    40  	"github.com/metacubex/gvisor/runsc/specutils"
    41  )
    42  
    43  // Exec implements subcommands.Command for the "exec" command.
    44  type Exec struct {
    45  	cwd string
    46  	env stringSlice
    47  	// user contains the UID and GID with which to run the new process.
    48  	user            user
    49  	extraKGIDs      stringSlice
    50  	caps            stringSlice
    51  	detach          bool
    52  	processPath     string
    53  	pidFile         string
    54  	internalPidFile string
    55  
    56  	// consoleSocket is the path to an AF_UNIX socket which will receive a
    57  	// file descriptor referencing the master end of the console's
    58  	// pseudoterminal.
    59  	consoleSocket string
    60  
    61  	// passFDs are user-supplied FDs from the host to be exposed to the
    62  	// sandboxed app.
    63  	passFDs fdMappings
    64  
    65  	// execFD is the host file descriptor used for program execution.
    66  	execFD int
    67  }
    68  
    69  // Name implements subcommands.Command.Name.
    70  func (*Exec) Name() string {
    71  	return "exec"
    72  }
    73  
    74  // Synopsis implements subcommands.Command.Synopsis.
    75  func (*Exec) Synopsis() string {
    76  	return "execute new process inside the container"
    77  }
    78  
    79  // Usage implements subcommands.Command.Usage.
    80  func (*Exec) Usage() string {
    81  	return `exec [command options] <container-id> <command> [command options] || --process process.json <container-id>
    82  
    83  
    84  Where "<container-id>" is the name for the instance of the container and
    85  "<command>" is the command to be executed in the container.
    86  "<command>" can't be empty unless a "-process" flag provided.
    87  
    88  EXAMPLE:
    89  If the container is configured to run /bin/ps the following will
    90  output a list of processes running in the container:
    91  
    92         # runc exec <container-id> ps
    93  
    94  OPTIONS:
    95  `
    96  }
    97  
    98  // SetFlags implements subcommands.Command.SetFlags.
    99  func (ex *Exec) SetFlags(f *flag.FlagSet) {
   100  	f.StringVar(&ex.cwd, "cwd", "", "current working directory")
   101  	f.Var(&ex.env, "env", "set environment variables (e.g. '-env PATH=/bin -env TERM=xterm')")
   102  	f.Var(&ex.user, "user", "UID (format: <uid>[:<gid>])")
   103  	f.Var(&ex.extraKGIDs, "additional-gids", "additional gids")
   104  	f.Var(&ex.caps, "cap", "add a capability to the bounding set for the process")
   105  	f.BoolVar(&ex.detach, "detach", false, "detach from the container's process")
   106  	f.StringVar(&ex.processPath, "process", "", "path to the process.json")
   107  	f.StringVar(&ex.pidFile, "pid-file", "", "filename that the container pid will be written to")
   108  	f.StringVar(&ex.internalPidFile, "internal-pid-file", "", "filename that the container-internal pid will be written to")
   109  	f.StringVar(&ex.consoleSocket, "console-socket", "", "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal")
   110  	f.Var(&ex.passFDs, "pass-fd", "file descriptor passed to the container in M:N format, where M is the host and N is the guest descriptor (can be supplied multiple times)")
   111  	f.IntVar(&ex.execFD, "exec-fd", -1, "host file descriptor used for program execution")
   112  }
   113  
   114  // Execute implements subcommands.Command.Execute. It starts a process in an
   115  // already created container.
   116  func (ex *Exec) Execute(_ context.Context, f *flag.FlagSet, args ...any) subcommands.ExitStatus {
   117  	conf := args[0].(*config.Config)
   118  	e, id, err := ex.parseArgs(f, conf.EnableRaw)
   119  	if err != nil {
   120  		util.Fatalf("parsing process spec: %v", err)
   121  	}
   122  	waitStatus := args[1].(*unix.WaitStatus)
   123  
   124  	c, err := container.Load(conf.RootDir, container.FullID{ContainerID: id}, container.LoadOpts{})
   125  	if err != nil {
   126  		util.Fatalf("loading sandbox: %v", err)
   127  	}
   128  
   129  	log.Debugf("Exec arguments: %+v", e)
   130  	log.Debugf("Exec capabilities: %+v", e.Capabilities)
   131  
   132  	// Replace empty settings with defaults from container.
   133  	if e.WorkingDirectory == "" {
   134  		e.WorkingDirectory = c.Spec.Process.Cwd
   135  	}
   136  	if e.Envv == nil {
   137  		e.Envv, err = specutils.ResolveEnvs(c.Spec.Process.Env, ex.env)
   138  		if err != nil {
   139  			util.Fatalf("getting environment variables: %v", err)
   140  		}
   141  	}
   142  
   143  	if e.Capabilities == nil {
   144  		e.Capabilities, err = specutils.Capabilities(conf.EnableRaw, c.Spec.Process.Capabilities)
   145  		if err != nil {
   146  			util.Fatalf("creating capabilities: %v", err)
   147  		}
   148  		log.Infof("Using exec capabilities from container: %+v", e.Capabilities)
   149  	}
   150  
   151  	// Create the file descriptor map for the process in the container.
   152  	fdMap := map[int]*os.File{
   153  		0: os.Stdin,
   154  		1: os.Stdout,
   155  		2: os.Stderr,
   156  	}
   157  
   158  	// Add custom file descriptors to the map.
   159  	for _, mapping := range ex.passFDs {
   160  		file := os.NewFile(uintptr(mapping.Host), "")
   161  		if file == nil {
   162  			util.Fatalf("failed to create file from file descriptor %d", mapping.Host)
   163  		}
   164  		fdMap[mapping.Guest] = file
   165  	}
   166  
   167  	var execFile *os.File
   168  	if ex.execFD >= 0 {
   169  		execFile = os.NewFile(uintptr(ex.execFD), "exec-fd")
   170  	}
   171  
   172  	// Close the underlying file descriptors after we have passed them.
   173  	defer func() {
   174  		for _, file := range fdMap {
   175  			fd := file.Fd()
   176  			if file.Close() != nil {
   177  				log.Debugf("Failed to close FD %d", fd)
   178  			}
   179  		}
   180  
   181  		if execFile != nil && execFile.Close() != nil {
   182  			log.Debugf("Failed to close exec FD")
   183  		}
   184  	}()
   185  
   186  	e.FilePayload = control.NewFilePayload(fdMap, execFile)
   187  
   188  	// containerd expects an actual process to represent the container being
   189  	// executed. If detach was specified, starts a child in non-detach mode,
   190  	// write the child's PID to the pid file. So when the container returns, the
   191  	// child process will also return and signal containerd.
   192  	if ex.detach {
   193  		return ex.execChildAndWait(waitStatus)
   194  	}
   195  	return ex.exec(conf, c, e, waitStatus)
   196  }
   197  
   198  func (ex *Exec) exec(conf *config.Config, c *container.Container, e *control.ExecArgs, waitStatus *unix.WaitStatus) subcommands.ExitStatus {
   199  	// Start the new process and get its pid.
   200  	pid, err := c.Execute(conf, e)
   201  	if err != nil {
   202  		return util.Errorf("executing processes for container: %v", err)
   203  	}
   204  
   205  	if e.StdioIsPty {
   206  		// Forward signals sent to this process to the foreground
   207  		// process in the sandbox.
   208  		stopForwarding := c.ForwardSignals(pid, true /* fgProcess */)
   209  		defer stopForwarding()
   210  	}
   211  
   212  	// Write the sandbox-internal pid if required.
   213  	if ex.internalPidFile != "" {
   214  		pidStr := []byte(strconv.Itoa(int(pid)))
   215  		if err := ioutil.WriteFile(ex.internalPidFile, pidStr, 0644); err != nil {
   216  			return util.Errorf("writing internal pid file %q: %v", ex.internalPidFile, err)
   217  		}
   218  	}
   219  
   220  	// Generate the pid file after the internal pid file is generated, so that
   221  	// users can safely assume that the internal pid file is ready after
   222  	// `runsc exec -d` returns.
   223  	if ex.pidFile != "" {
   224  		if err := ioutil.WriteFile(ex.pidFile, []byte(strconv.Itoa(os.Getpid())), 0644); err != nil {
   225  			return util.Errorf("writing pid file: %v", err)
   226  		}
   227  	}
   228  
   229  	// Wait for the process to exit.
   230  	ws, err := c.WaitPID(pid)
   231  	if err != nil {
   232  		return util.Errorf("waiting on pid %d: %v", pid, err)
   233  	}
   234  	*waitStatus = ws
   235  	return subcommands.ExitSuccess
   236  }
   237  
   238  func (ex *Exec) execChildAndWait(waitStatus *unix.WaitStatus) subcommands.ExitStatus {
   239  	var args []string
   240  	for _, a := range os.Args[1:] {
   241  		if !strings.Contains(a, "detach") {
   242  			args = append(args, a)
   243  		}
   244  	}
   245  
   246  	// The command needs to write a pid file so that execChildAndWait can tell
   247  	// when it has started. If no pid-file was provided, we should use a
   248  	// filename in a temp directory.
   249  	pidFile := ex.pidFile
   250  	if pidFile == "" {
   251  		tmpDir, err := ioutil.TempDir("", "exec-pid-")
   252  		if err != nil {
   253  			util.Fatalf("creating TempDir: %v", err)
   254  		}
   255  		defer os.RemoveAll(tmpDir)
   256  		pidFile = filepath.Join(tmpDir, "pid")
   257  		args = append(args, "--pid-file="+pidFile)
   258  	}
   259  
   260  	cmd := exec.Command(specutils.ExePath, args...)
   261  	cmd.Args[0] = "runsc-exec"
   262  
   263  	// Exec stdio defaults to current process stdio.
   264  	cmd.Stdin = os.Stdin
   265  	cmd.Stdout = os.Stdout
   266  	cmd.Stderr = os.Stderr
   267  
   268  	// If the console control socket file is provided, then create a new
   269  	// pty master/replica pair and set the TTY on the sandbox process.
   270  	if ex.consoleSocket != "" {
   271  		// Create a new TTY pair and send the master on the provided socket.
   272  		tty, err := console.NewWithSocket(ex.consoleSocket)
   273  		if err != nil {
   274  			util.Fatalf("setting up console with socket %q: %v", ex.consoleSocket, err)
   275  		}
   276  		defer tty.Close()
   277  
   278  		// Set stdio to the new TTY replica.
   279  		cmd.Stdin = tty
   280  		cmd.Stdout = tty
   281  		cmd.Stderr = tty
   282  		cmd.SysProcAttr = &unix.SysProcAttr{
   283  			Setsid:  true,
   284  			Setctty: true,
   285  			// The Ctty FD must be the FD in the child process's FD
   286  			// table. Since we set cmd.Stdin/Stdout/Stderr to the
   287  			// tty FD, we can use any of 0, 1, or 2 here.
   288  			// See https://github.com/golang/go/issues/29458.
   289  			Ctty: 0,
   290  		}
   291  	}
   292  
   293  	if err := cmd.Start(); err != nil {
   294  		util.Fatalf("failure to start child exec process, err: %v", err)
   295  	}
   296  
   297  	log.Infof("Started child (PID: %d) to exec and wait: %s %s", cmd.Process.Pid, specutils.ExePath, args)
   298  
   299  	// Wait for PID file to ensure that child process has started. Otherwise,
   300  	// '--process' file is deleted as soon as this process returns and the child
   301  	// may fail to read it.
   302  	ready := func() (bool, error) {
   303  		pidb, err := ioutil.ReadFile(pidFile)
   304  		if err == nil {
   305  			// File appeared, check whether pid is fully written.
   306  			pid, err := strconv.Atoi(string(pidb))
   307  			if err != nil {
   308  				return false, nil
   309  			}
   310  			return pid == cmd.Process.Pid, nil
   311  		}
   312  		if pe, ok := err.(*os.PathError); !ok || pe.Err != unix.ENOENT {
   313  			return false, err
   314  		}
   315  		// No file yet, continue to wait...
   316  		return false, nil
   317  	}
   318  	if err := specutils.WaitForReady(cmd.Process.Pid, 10*time.Second, ready); err != nil {
   319  		// Don't log fatal error here, otherwise it will override the error logged
   320  		// by the child process that has failed to start.
   321  		log.Warningf("Unexpected error waiting for PID file, err: %v", err)
   322  		return subcommands.ExitFailure
   323  	}
   324  
   325  	*waitStatus = 0
   326  	return subcommands.ExitSuccess
   327  }
   328  
   329  // parseArgs parses exec information from the command line or a JSON file
   330  // depending on whether the --process flag was used. Returns an ExecArgs and
   331  // the ID of the container to be used.
   332  func (ex *Exec) parseArgs(f *flag.FlagSet, enableRaw bool) (*control.ExecArgs, string, error) {
   333  	if ex.processPath == "" {
   334  		// Requires at least a container ID and command.
   335  		if f.NArg() < 2 {
   336  			f.Usage()
   337  			return nil, "", fmt.Errorf("both a container-id and command are required")
   338  		}
   339  		e, err := ex.argsFromCLI(f.Args()[1:], enableRaw)
   340  		return e, f.Arg(0), err
   341  	}
   342  	// Requires only the container ID.
   343  	if f.NArg() != 1 {
   344  		f.Usage()
   345  		return nil, "", fmt.Errorf("a container-id is required")
   346  	}
   347  	e, err := ex.argsFromProcessFile(enableRaw)
   348  	return e, f.Arg(0), err
   349  }
   350  
   351  func (ex *Exec) argsFromCLI(argv []string, enableRaw bool) (*control.ExecArgs, error) {
   352  	extraKGIDs := make([]auth.KGID, 0, len(ex.extraKGIDs))
   353  	for _, s := range ex.extraKGIDs {
   354  		kgid, err := strconv.Atoi(s)
   355  		if err != nil {
   356  			util.Fatalf("parsing GID: %s, %v", s, err)
   357  		}
   358  		extraKGIDs = append(extraKGIDs, auth.KGID(kgid))
   359  	}
   360  
   361  	var caps *auth.TaskCapabilities
   362  	if len(ex.caps) > 0 {
   363  		var err error
   364  		caps, err = capabilities(ex.caps, enableRaw)
   365  		if err != nil {
   366  			return nil, fmt.Errorf("capabilities error: %v", err)
   367  		}
   368  	}
   369  
   370  	return &control.ExecArgs{
   371  		Argv:             argv,
   372  		WorkingDirectory: ex.cwd,
   373  		KUID:             ex.user.kuid,
   374  		KGID:             ex.user.kgid,
   375  		ExtraKGIDs:       extraKGIDs,
   376  		Capabilities:     caps,
   377  		StdioIsPty:       ex.consoleSocket != "" || console.IsPty(os.Stdin.Fd()),
   378  		FilePayload: control.NewFilePayload(map[int]*os.File{
   379  			0: os.Stdin,
   380  			1: os.Stdout,
   381  			2: os.Stderr,
   382  		}, nil),
   383  	}, nil
   384  }
   385  
   386  func (ex *Exec) argsFromProcessFile(enableRaw bool) (*control.ExecArgs, error) {
   387  	f, err := os.Open(ex.processPath)
   388  	if err != nil {
   389  		return nil, fmt.Errorf("error opening process file: %s, %v", ex.processPath, err)
   390  	}
   391  	defer f.Close()
   392  	var p specs.Process
   393  	if err := json.NewDecoder(f).Decode(&p); err != nil {
   394  		return nil, fmt.Errorf("error parsing process file: %s, %v", ex.processPath, err)
   395  	}
   396  	return argsFromProcess(&p, enableRaw)
   397  }
   398  
   399  // argsFromProcess performs all the non-IO conversion from the Process struct
   400  // to ExecArgs.
   401  func argsFromProcess(p *specs.Process, enableRaw bool) (*control.ExecArgs, error) {
   402  	// Create capabilities.
   403  	var caps *auth.TaskCapabilities
   404  	if p.Capabilities != nil {
   405  		var err error
   406  		// Starting from Docker 19, capabilities are explicitly set for exec (instead
   407  		// of nil like before). So we can't distinguish 'exec' from
   408  		// 'exec --privileged', as both specify CAP_NET_RAW. Therefore, filter
   409  		// CAP_NET_RAW in the same way as container start.
   410  		caps, err = specutils.Capabilities(enableRaw, p.Capabilities)
   411  		if err != nil {
   412  			return nil, fmt.Errorf("error creating capabilities: %v", err)
   413  		}
   414  	}
   415  
   416  	// Convert the spec's additional GIDs to KGIDs.
   417  	extraKGIDs := make([]auth.KGID, 0, len(p.User.AdditionalGids))
   418  	for _, GID := range p.User.AdditionalGids {
   419  		extraKGIDs = append(extraKGIDs, auth.KGID(GID))
   420  	}
   421  
   422  	return &control.ExecArgs{
   423  		Argv:             p.Args,
   424  		Envv:             p.Env,
   425  		WorkingDirectory: p.Cwd,
   426  		KUID:             auth.KUID(p.User.UID),
   427  		KGID:             auth.KGID(p.User.GID),
   428  		ExtraKGIDs:       extraKGIDs,
   429  		Capabilities:     caps,
   430  		StdioIsPty:       p.Terminal,
   431  		FilePayload: control.NewFilePayload(map[int]*os.File{
   432  			0: os.Stdin,
   433  			1: os.Stdout,
   434  			2: os.Stderr,
   435  		}, nil),
   436  	}, nil
   437  }
   438  
   439  // capabilities takes a list of capabilities as strings and returns an
   440  // auth.TaskCapabilities struct with those capabilities in every capability set.
   441  // This mimics runc's behavior.
   442  func capabilities(cs []string, enableRaw bool) (*auth.TaskCapabilities, error) {
   443  	var specCaps specs.LinuxCapabilities
   444  	for _, cap := range cs {
   445  		specCaps.Ambient = append(specCaps.Ambient, cap)
   446  		specCaps.Bounding = append(specCaps.Bounding, cap)
   447  		specCaps.Effective = append(specCaps.Effective, cap)
   448  		specCaps.Inheritable = append(specCaps.Inheritable, cap)
   449  		specCaps.Permitted = append(specCaps.Permitted, cap)
   450  	}
   451  	// Starting from Docker 19, capabilities are explicitly set for exec (instead
   452  	// of nil like before). So we can't distinguish 'exec' from
   453  	// 'exec --privileged', as both specify CAP_NET_RAW. Therefore, filter
   454  	// CAP_NET_RAW in the same way as container start.
   455  	return specutils.Capabilities(enableRaw, &specCaps)
   456  }
   457  
   458  // stringSlice allows a flag to be used multiple times, where each occurrence
   459  // adds a value to the flag. For example, a flag called "x" could be invoked
   460  // via "runsc exec -x foo -x bar", and the corresponding stringSlice would be
   461  // {"x", "y"}.
   462  type stringSlice []string
   463  
   464  // String implements flag.Value.String.
   465  func (ss *stringSlice) String() string {
   466  	return strings.Join(*ss, ",")
   467  }
   468  
   469  // Get implements flag.Value.Get.
   470  func (ss *stringSlice) Get() any {
   471  	return ss
   472  }
   473  
   474  // Set implements flag.Value.Set. Set(String()) should be idempotent.
   475  func (ss *stringSlice) Set(s string) error {
   476  	*ss = append(*ss, strings.Split(s, ",")...)
   477  	return nil
   478  }
   479  
   480  // user allows -user to convey a UID and, optionally, a GID separated by a
   481  // colon.
   482  type user struct {
   483  	kuid auth.KUID
   484  	kgid auth.KGID
   485  }
   486  
   487  // String implements flag.Value.String.
   488  func (u *user) String() string {
   489  	return fmt.Sprintf("%d:%d", u.kuid, u.kgid)
   490  }
   491  
   492  // Get implements flag.Value.Get.
   493  func (u *user) Get() any {
   494  	return u
   495  }
   496  
   497  // Set implements flag.Value.Set. Set(String()) should be idempotent.
   498  func (u *user) Set(s string) error {
   499  	parts := strings.SplitN(s, ":", 2)
   500  	kuid, err := strconv.Atoi(parts[0])
   501  	if err != nil {
   502  		return fmt.Errorf("couldn't parse UID: %s", parts[0])
   503  	}
   504  	u.kuid = auth.KUID(kuid)
   505  	if len(parts) > 1 {
   506  		kgid, err := strconv.Atoi(parts[1])
   507  		if err != nil {
   508  			return fmt.Errorf("couldn't parse GID: %s", parts[1])
   509  		}
   510  		u.kgid = auth.KGID(kgid)
   511  	}
   512  	return nil
   513  }