github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/runsc/cmd/gofer.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cmd
    16  
    17  import (
    18  	"context"
    19  	"encoding/json"
    20  	"fmt"
    21  	"os"
    22  	"path/filepath"
    23  	"strings"
    24  
    25  	"github.com/google/subcommands"
    26  	specs "github.com/opencontainers/runtime-spec/specs-go"
    27  	"golang.org/x/sys/unix"
    28  	"github.com/SagerNet/gvisor/pkg/log"
    29  	"github.com/SagerNet/gvisor/pkg/p9"
    30  	"github.com/SagerNet/gvisor/pkg/sync"
    31  	"github.com/SagerNet/gvisor/pkg/unet"
    32  	"github.com/SagerNet/gvisor/runsc/config"
    33  	"github.com/SagerNet/gvisor/runsc/flag"
    34  	"github.com/SagerNet/gvisor/runsc/fsgofer"
    35  	"github.com/SagerNet/gvisor/runsc/fsgofer/filter"
    36  	"github.com/SagerNet/gvisor/runsc/specutils"
    37  )
    38  
    39  var caps = []string{
    40  	"CAP_CHOWN",
    41  	"CAP_DAC_OVERRIDE",
    42  	"CAP_DAC_READ_SEARCH",
    43  	"CAP_FOWNER",
    44  	"CAP_FSETID",
    45  	"CAP_SYS_CHROOT",
    46  }
    47  
    48  // goferCaps is the minimal set of capabilities needed by the Gofer to operate
    49  // on files.
    50  var goferCaps = &specs.LinuxCapabilities{
    51  	Bounding:  caps,
    52  	Effective: caps,
    53  	Permitted: caps,
    54  }
    55  
    56  // Gofer implements subcommands.Command for the "gofer" command, which starts a
    57  // filesystem gofer.  This command should not be called directly.
    58  type Gofer struct {
    59  	bundleDir string
    60  	ioFDs     intFlags
    61  	applyCaps bool
    62  	setUpRoot bool
    63  
    64  	specFD   int
    65  	mountsFD int
    66  }
    67  
    68  // Name implements subcommands.Command.
    69  func (*Gofer) Name() string {
    70  	return "gofer"
    71  }
    72  
    73  // Synopsis implements subcommands.Command.
    74  func (*Gofer) Synopsis() string {
    75  	return "launch a gofer process that serves files over 9P protocol (internal use only)"
    76  }
    77  
    78  // Usage implements subcommands.Command.
    79  func (*Gofer) Usage() string {
    80  	return `gofer [flags]`
    81  }
    82  
    83  // SetFlags implements subcommands.Command.
    84  func (g *Gofer) SetFlags(f *flag.FlagSet) {
    85  	f.StringVar(&g.bundleDir, "bundle", "", "path to the root of the bundle directory, defaults to the current directory")
    86  	f.Var(&g.ioFDs, "io-fds", "list of FDs to connect 9P servers. They must follow this order: root first, then mounts as defined in the spec")
    87  	f.BoolVar(&g.applyCaps, "apply-caps", true, "if true, apply capabilities to restrict what the Gofer process can do")
    88  	f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process")
    89  	f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec")
    90  	f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).")
    91  }
    92  
    93  // Execute implements subcommands.Command.
    94  func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
    95  	if g.bundleDir == "" || len(g.ioFDs) < 1 || g.specFD < 0 {
    96  		f.Usage()
    97  		return subcommands.ExitUsageError
    98  	}
    99  
   100  	conf := args[0].(*config.Config)
   101  
   102  	specFile := os.NewFile(uintptr(g.specFD), "spec file")
   103  	defer specFile.Close()
   104  	spec, err := specutils.ReadSpecFromFile(g.bundleDir, specFile, conf)
   105  	if err != nil {
   106  		Fatalf("reading spec: %v", err)
   107  	}
   108  
   109  	if g.setUpRoot {
   110  		if err := setupRootFS(spec, conf); err != nil {
   111  			Fatalf("Error setting up root FS: %v", err)
   112  		}
   113  	}
   114  	if g.applyCaps {
   115  		// Disable caps when calling myself again.
   116  		// Note: minimal argument handling for the default case to keep it simple.
   117  		args := os.Args
   118  		args = append(args, "--apply-caps=false", "--setup-root=false")
   119  		Fatalf("setCapsAndCallSelf(%v, %v): %v", args, goferCaps, setCapsAndCallSelf(args, goferCaps))
   120  		panic("unreachable")
   121  	}
   122  
   123  	// Find what path is going to be served by this gofer.
   124  	root := spec.Root.Path
   125  	if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
   126  		root = "/root"
   127  	}
   128  
   129  	// Resolve mount points paths, then replace mounts from our spec and send the
   130  	// mount list over to the sandbox, so they are both in sync.
   131  	//
   132  	// Note that all mount points have been mounted in the proper location in
   133  	// setupRootFS().
   134  	cleanMounts, err := resolveMounts(conf, spec.Mounts, root)
   135  	if err != nil {
   136  		Fatalf("Failure to resolve mounts: %v", err)
   137  	}
   138  	spec.Mounts = cleanMounts
   139  	go func() {
   140  		if err := g.writeMounts(cleanMounts); err != nil {
   141  			panic(fmt.Sprintf("Failed to write mounts: %v", err))
   142  		}
   143  	}()
   144  
   145  	specutils.LogSpec(spec)
   146  
   147  	// fsgofer should run with a umask of 0, because we want to preserve file
   148  	// modes exactly as sent by the sandbox, which will have applied its own umask.
   149  	unix.Umask(0)
   150  
   151  	if err := fsgofer.OpenProcSelfFD(); err != nil {
   152  		Fatalf("failed to open /proc/self/fd: %v", err)
   153  	}
   154  
   155  	if err := unix.Chroot(root); err != nil {
   156  		Fatalf("failed to chroot to %q: %v", root, err)
   157  	}
   158  	if err := unix.Chdir("/"); err != nil {
   159  		Fatalf("changing working dir: %v", err)
   160  	}
   161  	log.Infof("Process chroot'd to %q", root)
   162  
   163  	// Start with root mount, then add any other additional mount as needed.
   164  	ats := make([]p9.Attacher, 0, len(spec.Mounts)+1)
   165  	ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{
   166  		ROMount:           spec.Root.Readonly || conf.Overlay,
   167  		EnableVerityXattr: conf.Verity,
   168  	})
   169  	if err != nil {
   170  		Fatalf("creating attach point: %v", err)
   171  	}
   172  	ats = append(ats, ap)
   173  	log.Infof("Serving %q mapped to %q on FD %d (ro: %t)", "/", root, g.ioFDs[0], spec.Root.Readonly)
   174  
   175  	mountIdx := 1 // first one is the root
   176  	for _, m := range spec.Mounts {
   177  		if specutils.Is9PMount(m, conf.VFS2) {
   178  			cfg := fsgofer.Config{
   179  				ROMount:           isReadonlyMount(m.Options) || conf.Overlay,
   180  				HostUDS:           conf.FSGoferHostUDS,
   181  				EnableVerityXattr: conf.Verity,
   182  			}
   183  			ap, err := fsgofer.NewAttachPoint(m.Destination, cfg)
   184  			if err != nil {
   185  				Fatalf("creating attach point: %v", err)
   186  			}
   187  			ats = append(ats, ap)
   188  
   189  			if mountIdx >= len(g.ioFDs) {
   190  				Fatalf("no FD found for mount. Did you forget --io-fd? mount: %d, %v", len(g.ioFDs), m)
   191  			}
   192  			log.Infof("Serving %q mapped on FD %d (ro: %t)", m.Destination, g.ioFDs[mountIdx], cfg.ROMount)
   193  			mountIdx++
   194  		}
   195  	}
   196  	if mountIdx != len(g.ioFDs) {
   197  		Fatalf("too many FDs passed for mounts. mounts: %d, FDs: %d", mountIdx, len(g.ioFDs))
   198  	}
   199  
   200  	if conf.FSGoferHostUDS {
   201  		filter.InstallUDSFilters()
   202  	}
   203  
   204  	if conf.Verity {
   205  		filter.InstallXattrFilters()
   206  	}
   207  
   208  	if err := filter.Install(); err != nil {
   209  		Fatalf("installing seccomp filters: %v", err)
   210  	}
   211  
   212  	runServers(ats, g.ioFDs)
   213  	return subcommands.ExitSuccess
   214  }
   215  
   216  func runServers(ats []p9.Attacher, ioFDs []int) {
   217  	// Run the loops and wait for all to exit.
   218  	var wg sync.WaitGroup
   219  	for i, ioFD := range ioFDs {
   220  		wg.Add(1)
   221  		go func(ioFD int, at p9.Attacher) {
   222  			socket, err := unet.NewSocket(ioFD)
   223  			if err != nil {
   224  				Fatalf("creating server on FD %d: %v", ioFD, err)
   225  			}
   226  			s := p9.NewServer(at)
   227  			if err := s.Handle(socket); err != nil {
   228  				Fatalf("P9 server returned error. Gofer is shutting down. FD: %d, err: %v", ioFD, err)
   229  			}
   230  			wg.Done()
   231  		}(ioFD, ats[i])
   232  	}
   233  	wg.Wait()
   234  	log.Infof("All 9P servers exited.")
   235  }
   236  
   237  func (g *Gofer) writeMounts(mounts []specs.Mount) error {
   238  	bytes, err := json.Marshal(mounts)
   239  	if err != nil {
   240  		return err
   241  	}
   242  
   243  	f := os.NewFile(uintptr(g.mountsFD), "mounts file")
   244  	defer f.Close()
   245  
   246  	for written := 0; written < len(bytes); {
   247  		w, err := f.Write(bytes[written:])
   248  		if err != nil {
   249  			return err
   250  		}
   251  		written += w
   252  	}
   253  	return nil
   254  }
   255  
   256  func isReadonlyMount(opts []string) bool {
   257  	for _, o := range opts {
   258  		if o == "ro" {
   259  			return true
   260  		}
   261  	}
   262  	return false
   263  }
   264  
   265  func setupRootFS(spec *specs.Spec, conf *config.Config) error {
   266  	// Convert all shared mounts into slaves to be sure that nothing will be
   267  	// propagated outside of our namespace.
   268  	procPath := "/proc"
   269  	if err := specutils.SafeMount("", "/", "", unix.MS_SLAVE|unix.MS_REC, "", procPath); err != nil {
   270  		Fatalf("error converting mounts: %v", err)
   271  	}
   272  
   273  	root := spec.Root.Path
   274  	if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
   275  		// runsc can't be re-executed without /proc, so we create a tmpfs mount,
   276  		// mount ./proc and ./root there, then move this mount to the root and after
   277  		// setCapsAndCallSelf, runsc will chroot into /root.
   278  		//
   279  		// We need a directory to construct a new root and we know that
   280  		// runsc can't start without /proc, so we can use it for this.
   281  		flags := uintptr(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC)
   282  		if err := specutils.SafeMount("runsc-root", "/proc", "tmpfs", flags, "", procPath); err != nil {
   283  			Fatalf("error mounting tmpfs: %v", err)
   284  		}
   285  
   286  		// Prepare tree structure for pivot_root(2).
   287  		os.Mkdir("/proc/proc", 0755)
   288  		os.Mkdir("/proc/root", 0755)
   289  		// This cannot use SafeMount because there's no available procfs. But we
   290  		// know that /proc is an empty tmpfs mount, so this is safe.
   291  		if err := unix.Mount("runsc-proc", "/proc/proc", "proc", flags|unix.MS_RDONLY, ""); err != nil {
   292  			Fatalf("error mounting proc: %v", err)
   293  		}
   294  		root = "/proc/root"
   295  		procPath = "/proc/proc"
   296  	}
   297  
   298  	// Mount root path followed by submounts.
   299  	if err := specutils.SafeMount(spec.Root.Path, root, "bind", unix.MS_BIND|unix.MS_REC, "", procPath); err != nil {
   300  		return fmt.Errorf("mounting root on root (%q) err: %v", root, err)
   301  	}
   302  
   303  	flags := uint32(unix.MS_SLAVE | unix.MS_REC)
   304  	if spec.Linux != nil && spec.Linux.RootfsPropagation != "" {
   305  		flags = specutils.PropOptionsToFlags([]string{spec.Linux.RootfsPropagation})
   306  	}
   307  	if err := specutils.SafeMount("", root, "", uintptr(flags), "", procPath); err != nil {
   308  		return fmt.Errorf("mounting root (%q) with flags: %#x, err: %v", root, flags, err)
   309  	}
   310  
   311  	// Replace the current spec, with the clean spec with symlinks resolved.
   312  	if err := setupMounts(conf, spec.Mounts, root, procPath); err != nil {
   313  		Fatalf("error setting up FS: %v", err)
   314  	}
   315  
   316  	// Create working directory if needed.
   317  	if spec.Process.Cwd != "" {
   318  		dst, err := resolveSymlinks(root, spec.Process.Cwd)
   319  		if err != nil {
   320  			return fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err)
   321  		}
   322  		log.Infof("Create working directory %q if needed", spec.Process.Cwd)
   323  		if err := os.MkdirAll(dst, 0755); err != nil {
   324  			return fmt.Errorf("creating working directory %q: %v", spec.Process.Cwd, err)
   325  		}
   326  	}
   327  
   328  	// Check if root needs to be remounted as readonly.
   329  	if spec.Root.Readonly || conf.Overlay {
   330  		// If root is a mount point but not read-only, we can change mount options
   331  		// to make it read-only for extra safety.
   332  		log.Infof("Remounting root as readonly: %q", root)
   333  		flags := uintptr(unix.MS_BIND | unix.MS_REMOUNT | unix.MS_RDONLY | unix.MS_REC)
   334  		if err := specutils.SafeMount(root, root, "bind", flags, "", procPath); err != nil {
   335  			return fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", root, root, flags, err)
   336  		}
   337  	}
   338  
   339  	if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
   340  		if err := pivotRoot("/proc"); err != nil {
   341  			Fatalf("failed to change the root file system: %v", err)
   342  		}
   343  		if err := os.Chdir("/"); err != nil {
   344  			Fatalf("failed to change working directory")
   345  		}
   346  	}
   347  	return nil
   348  }
   349  
   350  // setupMounts bind mounts all mounts specified in the spec in their correct
   351  // location inside root. It will resolve relative paths and symlinks. It also
   352  // creates directories as needed.
   353  func setupMounts(conf *config.Config, mounts []specs.Mount, root, procPath string) error {
   354  	for _, m := range mounts {
   355  		if !specutils.Is9PMount(m, conf.VFS2) {
   356  			continue
   357  		}
   358  
   359  		dst, err := resolveSymlinks(root, m.Destination)
   360  		if err != nil {
   361  			return fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err)
   362  		}
   363  
   364  		flags := specutils.OptionsToFlags(m.Options) | unix.MS_BIND
   365  		if conf.Overlay {
   366  			// Force mount read-only if writes are not going to be sent to it.
   367  			flags |= unix.MS_RDONLY
   368  		}
   369  
   370  		log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags)
   371  		if err := specutils.Mount(m.Source, dst, m.Type, flags, procPath); err != nil {
   372  			return fmt.Errorf("mounting %+v: %v", m, err)
   373  		}
   374  
   375  		// Set propagation options that cannot be set together with other options.
   376  		flags = specutils.PropOptionsToFlags(m.Options)
   377  		if flags != 0 {
   378  			if err := specutils.SafeMount("", dst, "", uintptr(flags), "", procPath); err != nil {
   379  				return fmt.Errorf("mount dst: %q, flags: %#x, err: %v", dst, flags, err)
   380  			}
   381  		}
   382  	}
   383  	return nil
   384  }
   385  
   386  // resolveMounts resolved relative paths and symlinks to mount points.
   387  //
   388  // Note: mount points must already be in place for resolution to work.
   389  // Otherwise, it may follow symlinks to locations that would be overwritten
   390  // with another mount point and return the wrong location. In short, make sure
   391  // setupMounts() has been called before.
   392  func resolveMounts(conf *config.Config, mounts []specs.Mount, root string) ([]specs.Mount, error) {
   393  	cleanMounts := make([]specs.Mount, 0, len(mounts))
   394  	for _, m := range mounts {
   395  		if !specutils.Is9PMount(m, conf.VFS2) {
   396  			cleanMounts = append(cleanMounts, m)
   397  			continue
   398  		}
   399  		dst, err := resolveSymlinks(root, m.Destination)
   400  		if err != nil {
   401  			return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err)
   402  		}
   403  		relDst, err := filepath.Rel(root, dst)
   404  		if err != nil {
   405  			panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, root, err))
   406  		}
   407  
   408  		opts, err := adjustMountOptions(conf, filepath.Join(root, relDst), m.Options)
   409  		if err != nil {
   410  			return nil, err
   411  		}
   412  
   413  		cpy := m
   414  		cpy.Destination = filepath.Join("/", relDst)
   415  		cpy.Options = opts
   416  		cleanMounts = append(cleanMounts, cpy)
   417  	}
   418  	return cleanMounts, nil
   419  }
   420  
   421  // ResolveSymlinks walks 'rel' having 'root' as the root directory. If there are
   422  // symlinks, they are evaluated relative to 'root' to ensure the end result is
   423  // the same as if the process was running inside the container.
   424  func resolveSymlinks(root, rel string) (string, error) {
   425  	return resolveSymlinksImpl(root, root, rel, 255)
   426  }
   427  
   428  func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) {
   429  	if followCount == 0 {
   430  		return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel))
   431  	}
   432  
   433  	rel = filepath.Clean(rel)
   434  	for _, name := range strings.Split(rel, string(filepath.Separator)) {
   435  		if name == "" {
   436  			continue
   437  		}
   438  		// Note that Join() resolves things like ".." and returns a clean path.
   439  		path := filepath.Join(base, name)
   440  		if !strings.HasPrefix(path, root) {
   441  			// One cannot '..' their way out of root.
   442  			base = root
   443  			continue
   444  		}
   445  		fi, err := os.Lstat(path)
   446  		if err != nil {
   447  			if !os.IsNotExist(err) {
   448  				return "", err
   449  			}
   450  			// Not found means there is no symlink to check. Just keep walking dirs.
   451  			base = path
   452  			continue
   453  		}
   454  		if fi.Mode()&os.ModeSymlink != 0 {
   455  			link, err := os.Readlink(path)
   456  			if err != nil {
   457  				return "", err
   458  			}
   459  			if filepath.IsAbs(link) {
   460  				base = root
   461  			}
   462  			base, err = resolveSymlinksImpl(root, base, link, followCount-1)
   463  			if err != nil {
   464  				return "", err
   465  			}
   466  			continue
   467  		}
   468  		base = path
   469  	}
   470  	return base, nil
   471  }
   472  
   473  // adjustMountOptions adds 'overlayfs_stale_read' if mounting over overlayfs.
   474  func adjustMountOptions(conf *config.Config, path string, opts []string) ([]string, error) {
   475  	rv := make([]string, len(opts))
   476  	copy(rv, opts)
   477  
   478  	statfs := unix.Statfs_t{}
   479  	if err := unix.Statfs(path, &statfs); err != nil {
   480  		return nil, err
   481  	}
   482  	if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC {
   483  		rv = append(rv, "overlayfs_stale_read")
   484  	}
   485  	return rv, nil
   486  }