github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/runsc/cmd/boot.go

github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/runsc/cmd/boot.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cmd
    16  
    17  import (
    18  	"context"
    19  	"os"
    20  	"runtime/debug"
    21  	"strings"
    22  
    23  	"github.com/google/subcommands"
    24  	specs "github.com/opencontainers/runtime-spec/specs-go"
    25  	"golang.org/x/sys/unix"
    26  	"github.com/SagerNet/gvisor/pkg/log"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/platform"
    28  	"github.com/SagerNet/gvisor/runsc/boot"
    29  	"github.com/SagerNet/gvisor/runsc/config"
    30  	"github.com/SagerNet/gvisor/runsc/flag"
    31  	"github.com/SagerNet/gvisor/runsc/specutils"
    32  )
    33  
    34  // Boot implements subcommands.Command for the "boot" command which starts a
    35  // new sandbox. It should not be called directly.
    36  type Boot struct {
    37  	// bundleDir is the directory containing the OCI spec.
    38  	bundleDir string
    39  
    40  	// specFD is the file descriptor that the spec will be read from.
    41  	specFD int
    42  
    43  	// controllerFD is the file descriptor of a stream socket for the
    44  	// control server that is donated to this process.
    45  	controllerFD int
    46  
    47  	// deviceFD is the file descriptor for the platform device file.
    48  	deviceFD int
    49  
    50  	// ioFDs is the list of FDs used to connect to FS gofers.
    51  	ioFDs intFlags
    52  
    53  	// stdioFDs are the fds for stdin, stdout, and stderr. They must be
    54  	// provided in that order.
    55  	stdioFDs intFlags
    56  
    57  	// applyCaps determines if capabilities defined in the spec should be applied
    58  	// to the process.
    59  	applyCaps bool
    60  
    61  	// setUpChroot is set to true if the sandbox is started in an empty root.
    62  	setUpRoot bool
    63  
    64  	// cpuNum number of CPUs to create inside the sandbox.
    65  	cpuNum int
    66  
    67  	// totalMem sets the initial amount of total memory to report back to the
    68  	// container.
    69  	totalMem uint64
    70  
    71  	// userLogFD is the file descriptor to write user logs to.
    72  	userLogFD int
    73  
    74  	// startSyncFD is the file descriptor to synchronize runsc and sandbox.
    75  	startSyncFD int
    76  
    77  	// mountsFD is the file descriptor to read list of mounts after they have
    78  	// been resolved (direct paths, no symlinks). They are resolved outside the
    79  	// sandbox (e.g. gofer) and sent through this FD.
    80  	mountsFD int
    81  
    82  	// pidns is set if the sandbox is in its own pid namespace.
    83  	pidns bool
    84  
    85  	// attached is set to true to kill the sandbox process when the parent process
    86  	// terminates. This flag is set when the command execve's itself because
    87  	// parent death signal doesn't propagate through execve when uid/gid changes.
    88  	attached bool
    89  }
    90  
    91  // Name implements subcommands.Command.Name.
    92  func (*Boot) Name() string {
    93  	return "boot"
    94  }
    95  
    96  // Synopsis implements subcommands.Command.Synopsis.
    97  func (*Boot) Synopsis() string {
    98  	return "launch a sandbox process (internal use only)"
    99  }
   100  
   101  // Usage implements subcommands.Command.Usage.
   102  func (*Boot) Usage() string {
   103  	return `boot [flags] <container id>`
   104  }
   105  
   106  // SetFlags implements subcommands.Command.SetFlags.
   107  func (b *Boot) SetFlags(f *flag.FlagSet) {
   108  	f.StringVar(&b.bundleDir, "bundle", "", "required path to the root of the bundle directory")
   109  	f.IntVar(&b.specFD, "spec-fd", -1, "required fd with the container spec")
   110  	f.IntVar(&b.controllerFD, "controller-fd", -1, "required FD of a stream socket for the control server that must be donated to this process")
   111  	f.IntVar(&b.deviceFD, "device-fd", -1, "FD for the platform device file")
   112  	f.Var(&b.ioFDs, "io-fds", "list of FDs to connect 9P clients. They must follow this order: root first, then mounts as defined in the spec")
   113  	f.Var(&b.stdioFDs, "stdio-fds", "list of FDs containing sandbox stdin, stdout, and stderr in that order")
   114  	f.BoolVar(&b.applyCaps, "apply-caps", false, "if true, apply capabilities defined in the spec to the process")
   115  	f.BoolVar(&b.setUpRoot, "setup-root", false, "if true, set up an empty root for the process")
   116  	f.BoolVar(&b.pidns, "pidns", false, "if true, the sandbox is in its own PID namespace")
   117  	f.IntVar(&b.cpuNum, "cpu-num", 0, "number of CPUs to create inside the sandbox")
   118  	f.Uint64Var(&b.totalMem, "total-memory", 0, "sets the initial amount of total memory to report back to the container")
   119  	f.IntVar(&b.userLogFD, "user-log-fd", 0, "file descriptor to write user logs to. 0 means no logging.")
   120  	f.IntVar(&b.startSyncFD, "start-sync-fd", -1, "required FD to used to synchronize sandbox startup")
   121  	f.IntVar(&b.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to read list of mounts after they have been resolved (direct paths, no symlinks).")
   122  	f.BoolVar(&b.attached, "attached", false, "if attached is true, kills the sandbox process when the parent process terminates")
   123  }
   124  
   125  // Execute implements subcommands.Command.Execute.  It starts a sandbox in a
   126  // waiting state.
   127  func (b *Boot) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
   128  	if b.specFD == -1 || b.controllerFD == -1 || b.startSyncFD == -1 || f.NArg() != 1 {
   129  		f.Usage()
   130  		return subcommands.ExitUsageError
   131  	}
   132  
   133  	conf := args[0].(*config.Config)
   134  
   135  	// Set traceback level
   136  	debug.SetTraceback(conf.Traceback)
   137  
   138  	if b.attached {
   139  		// Ensure this process is killed after parent process terminates when
   140  		// attached mode is enabled. In the unfortunate event that the parent
   141  		// terminates before this point, this process leaks.
   142  		if err := unix.Prctl(unix.PR_SET_PDEATHSIG, uintptr(unix.SIGKILL), 0, 0, 0); err != nil {
   143  			Fatalf("error setting parent death signal: %v", err)
   144  		}
   145  	}
   146  
   147  	if b.setUpRoot {
   148  		if err := setUpChroot(b.pidns); err != nil {
   149  			Fatalf("error setting up chroot: %v", err)
   150  		}
   151  
   152  		if !b.applyCaps && !conf.Rootless {
   153  			// Remove --apply-caps arg to call myself. It has already been done.
   154  			args := prepareArgs(b.attached, "setup-root")
   155  
   156  			// Note that we've already read the spec from the spec FD, and
   157  			// we will read it again after the exec call. This works
   158  			// because the ReadSpecFromFile function seeks to the beginning
   159  			// of the file before reading.
   160  			Fatalf("callSelfAsNobody(%v): %v", args, callSelfAsNobody(args))
   161  			panic("unreachable")
   162  		}
   163  	}
   164  
   165  	// Get the spec from the specFD.
   166  	specFile := os.NewFile(uintptr(b.specFD), "spec file")
   167  	defer specFile.Close()
   168  	spec, err := specutils.ReadSpecFromFile(b.bundleDir, specFile, conf)
   169  	if err != nil {
   170  		Fatalf("reading spec: %v", err)
   171  	}
   172  	specutils.LogSpec(spec)
   173  
   174  	if b.applyCaps {
   175  		caps := spec.Process.Capabilities
   176  		if caps == nil {
   177  			caps = &specs.LinuxCapabilities{}
   178  		}
   179  
   180  		gPlatform, err := platform.Lookup(conf.Platform)
   181  		if err != nil {
   182  			Fatalf("loading platform: %v", err)
   183  		}
   184  		if gPlatform.Requirements().RequiresCapSysPtrace {
   185  			// Ptrace platform requires extra capabilities.
   186  			const c = "CAP_SYS_PTRACE"
   187  			caps.Bounding = append(caps.Bounding, c)
   188  			caps.Effective = append(caps.Effective, c)
   189  			caps.Permitted = append(caps.Permitted, c)
   190  		}
   191  
   192  		// Remove --apply-caps and --setup-root arg to call myself. Both have
   193  		// already been done.
   194  		args := prepareArgs(b.attached, "setup-root", "apply-caps")
   195  
   196  		// Note that we've already read the spec from the spec FD, and
   197  		// we will read it again after the exec call. This works
   198  		// because the ReadSpecFromFile function seeks to the beginning
   199  		// of the file before reading.
   200  		Fatalf("setCapsAndCallSelf(%v, %v): %v", args, caps, setCapsAndCallSelf(args, caps))
   201  		panic("unreachable")
   202  	}
   203  
   204  	// Read resolved mount list and replace the original one from the spec.
   205  	mountsFile := os.NewFile(uintptr(b.mountsFD), "mounts file")
   206  	cleanMounts, err := specutils.ReadMounts(mountsFile)
   207  	if err != nil {
   208  		mountsFile.Close()
   209  		Fatalf("Error reading mounts file: %v", err)
   210  	}
   211  	mountsFile.Close()
   212  	spec.Mounts = cleanMounts
   213  
   214  	// Create the loader.
   215  	bootArgs := boot.Args{
   216  		ID:           f.Arg(0),
   217  		Spec:         spec,
   218  		Conf:         conf,
   219  		ControllerFD: b.controllerFD,
   220  		Device:       os.NewFile(uintptr(b.deviceFD), "platform device"),
   221  		GoferFDs:     b.ioFDs.GetArray(),
   222  		StdioFDs:     b.stdioFDs.GetArray(),
   223  		NumCPU:       b.cpuNum,
   224  		TotalMem:     b.totalMem,
   225  		UserLogFD:    b.userLogFD,
   226  	}
   227  	l, err := boot.New(bootArgs)
   228  	if err != nil {
   229  		Fatalf("creating loader: %v", err)
   230  	}
   231  
   232  	// Fatalf exits the process and doesn't run defers.
   233  	// 'l' must be destroyed explicitly after this point!
   234  
   235  	// Notify the parent process the sandbox has booted (and that the controller
   236  	// is up).
   237  	startSyncFile := os.NewFile(uintptr(b.startSyncFD), "start-sync file")
   238  	buf := make([]byte, 1)
   239  	if w, err := startSyncFile.Write(buf); err != nil || w != 1 {
   240  		l.Destroy()
   241  		Fatalf("unable to write into the start-sync descriptor: %v", err)
   242  	}
   243  	// Closes startSyncFile because 'l.Run()' only returns when the sandbox exits.
   244  	startSyncFile.Close()
   245  
   246  	// Wait for the start signal from runsc.
   247  	l.WaitForStartSignal()
   248  
   249  	// Run the application and wait for it to finish.
   250  	if err := l.Run(); err != nil {
   251  		l.Destroy()
   252  		Fatalf("running sandbox: %v", err)
   253  	}
   254  
   255  	ws := l.WaitExit()
   256  	log.Infof("application exiting with %+v", ws)
   257  	waitStatus := args[1].(*unix.WaitStatus)
   258  	*waitStatus = unix.WaitStatus(ws.Status())
   259  	l.Destroy()
   260  	return subcommands.ExitSuccess
   261  }
   262  
   263  func prepareArgs(attached bool, exclude ...string) []string {
   264  	var args []string
   265  	for _, arg := range os.Args {
   266  		for _, excl := range exclude {
   267  			if strings.Contains(arg, excl) {
   268  				goto skip
   269  			}
   270  		}
   271  		args = append(args, arg)
   272  		if attached && arg == "boot" {
   273  			// Strategicaly place "--attached" after the command. This is needed
   274  			// to ensure the new process is killed when the parent process terminates.
   275  			args = append(args, "--attached")
   276  		}
   277  	skip:
   278  	}
   279  	return args
   280  }