github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/runsc/cmd/gofer.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package cmd 16 17 import ( 18 "context" 19 "encoding/json" 20 "fmt" 21 "os" 22 "path/filepath" 23 "strings" 24 25 "github.com/google/subcommands" 26 specs "github.com/opencontainers/runtime-spec/specs-go" 27 "golang.org/x/sys/unix" 28 "github.com/SagerNet/gvisor/pkg/log" 29 "github.com/SagerNet/gvisor/pkg/p9" 30 "github.com/SagerNet/gvisor/pkg/sync" 31 "github.com/SagerNet/gvisor/pkg/unet" 32 "github.com/SagerNet/gvisor/runsc/config" 33 "github.com/SagerNet/gvisor/runsc/flag" 34 "github.com/SagerNet/gvisor/runsc/fsgofer" 35 "github.com/SagerNet/gvisor/runsc/fsgofer/filter" 36 "github.com/SagerNet/gvisor/runsc/specutils" 37 ) 38 39 var caps = []string{ 40 "CAP_CHOWN", 41 "CAP_DAC_OVERRIDE", 42 "CAP_DAC_READ_SEARCH", 43 "CAP_FOWNER", 44 "CAP_FSETID", 45 "CAP_SYS_CHROOT", 46 } 47 48 // goferCaps is the minimal set of capabilities needed by the Gofer to operate 49 // on files. 50 var goferCaps = &specs.LinuxCapabilities{ 51 Bounding: caps, 52 Effective: caps, 53 Permitted: caps, 54 } 55 56 // Gofer implements subcommands.Command for the "gofer" command, which starts a 57 // filesystem gofer. This command should not be called directly. 58 type Gofer struct { 59 bundleDir string 60 ioFDs intFlags 61 applyCaps bool 62 setUpRoot bool 63 64 specFD int 65 mountsFD int 66 } 67 68 // Name implements subcommands.Command. 69 func (*Gofer) Name() string { 70 return "gofer" 71 } 72 73 // Synopsis implements subcommands.Command. 74 func (*Gofer) Synopsis() string { 75 return "launch a gofer process that serves files over 9P protocol (internal use only)" 76 } 77 78 // Usage implements subcommands.Command. 79 func (*Gofer) Usage() string { 80 return `gofer [flags]` 81 } 82 83 // SetFlags implements subcommands.Command. 84 func (g *Gofer) SetFlags(f *flag.FlagSet) { 85 f.StringVar(&g.bundleDir, "bundle", "", "path to the root of the bundle directory, defaults to the current directory") 86 f.Var(&g.ioFDs, "io-fds", "list of FDs to connect 9P servers. They must follow this order: root first, then mounts as defined in the spec") 87 f.BoolVar(&g.applyCaps, "apply-caps", true, "if true, apply capabilities to restrict what the Gofer process can do") 88 f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process") 89 f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec") 90 f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).") 91 } 92 93 // Execute implements subcommands.Command. 94 func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { 95 if g.bundleDir == "" || len(g.ioFDs) < 1 || g.specFD < 0 { 96 f.Usage() 97 return subcommands.ExitUsageError 98 } 99 100 conf := args[0].(*config.Config) 101 102 specFile := os.NewFile(uintptr(g.specFD), "spec file") 103 defer specFile.Close() 104 spec, err := specutils.ReadSpecFromFile(g.bundleDir, specFile, conf) 105 if err != nil { 106 Fatalf("reading spec: %v", err) 107 } 108 109 if g.setUpRoot { 110 if err := setupRootFS(spec, conf); err != nil { 111 Fatalf("Error setting up root FS: %v", err) 112 } 113 } 114 if g.applyCaps { 115 // Disable caps when calling myself again. 116 // Note: minimal argument handling for the default case to keep it simple. 117 args := os.Args 118 args = append(args, "--apply-caps=false", "--setup-root=false") 119 Fatalf("setCapsAndCallSelf(%v, %v): %v", args, goferCaps, setCapsAndCallSelf(args, goferCaps)) 120 panic("unreachable") 121 } 122 123 // Find what path is going to be served by this gofer. 124 root := spec.Root.Path 125 if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { 126 root = "/root" 127 } 128 129 // Resolve mount points paths, then replace mounts from our spec and send the 130 // mount list over to the sandbox, so they are both in sync. 131 // 132 // Note that all mount points have been mounted in the proper location in 133 // setupRootFS(). 134 cleanMounts, err := resolveMounts(conf, spec.Mounts, root) 135 if err != nil { 136 Fatalf("Failure to resolve mounts: %v", err) 137 } 138 spec.Mounts = cleanMounts 139 go func() { 140 if err := g.writeMounts(cleanMounts); err != nil { 141 panic(fmt.Sprintf("Failed to write mounts: %v", err)) 142 } 143 }() 144 145 specutils.LogSpec(spec) 146 147 // fsgofer should run with a umask of 0, because we want to preserve file 148 // modes exactly as sent by the sandbox, which will have applied its own umask. 149 unix.Umask(0) 150 151 if err := fsgofer.OpenProcSelfFD(); err != nil { 152 Fatalf("failed to open /proc/self/fd: %v", err) 153 } 154 155 if err := unix.Chroot(root); err != nil { 156 Fatalf("failed to chroot to %q: %v", root, err) 157 } 158 if err := unix.Chdir("/"); err != nil { 159 Fatalf("changing working dir: %v", err) 160 } 161 log.Infof("Process chroot'd to %q", root) 162 163 // Start with root mount, then add any other additional mount as needed. 164 ats := make([]p9.Attacher, 0, len(spec.Mounts)+1) 165 ap, err := fsgofer.NewAttachPoint("/", fsgofer.Config{ 166 ROMount: spec.Root.Readonly || conf.Overlay, 167 EnableVerityXattr: conf.Verity, 168 }) 169 if err != nil { 170 Fatalf("creating attach point: %v", err) 171 } 172 ats = append(ats, ap) 173 log.Infof("Serving %q mapped to %q on FD %d (ro: %t)", "/", root, g.ioFDs[0], spec.Root.Readonly) 174 175 mountIdx := 1 // first one is the root 176 for _, m := range spec.Mounts { 177 if specutils.Is9PMount(m, conf.VFS2) { 178 cfg := fsgofer.Config{ 179 ROMount: isReadonlyMount(m.Options) || conf.Overlay, 180 HostUDS: conf.FSGoferHostUDS, 181 EnableVerityXattr: conf.Verity, 182 } 183 ap, err := fsgofer.NewAttachPoint(m.Destination, cfg) 184 if err != nil { 185 Fatalf("creating attach point: %v", err) 186 } 187 ats = append(ats, ap) 188 189 if mountIdx >= len(g.ioFDs) { 190 Fatalf("no FD found for mount. Did you forget --io-fd? mount: %d, %v", len(g.ioFDs), m) 191 } 192 log.Infof("Serving %q mapped on FD %d (ro: %t)", m.Destination, g.ioFDs[mountIdx], cfg.ROMount) 193 mountIdx++ 194 } 195 } 196 if mountIdx != len(g.ioFDs) { 197 Fatalf("too many FDs passed for mounts. mounts: %d, FDs: %d", mountIdx, len(g.ioFDs)) 198 } 199 200 if conf.FSGoferHostUDS { 201 filter.InstallUDSFilters() 202 } 203 204 if conf.Verity { 205 filter.InstallXattrFilters() 206 } 207 208 if err := filter.Install(); err != nil { 209 Fatalf("installing seccomp filters: %v", err) 210 } 211 212 runServers(ats, g.ioFDs) 213 return subcommands.ExitSuccess 214 } 215 216 func runServers(ats []p9.Attacher, ioFDs []int) { 217 // Run the loops and wait for all to exit. 218 var wg sync.WaitGroup 219 for i, ioFD := range ioFDs { 220 wg.Add(1) 221 go func(ioFD int, at p9.Attacher) { 222 socket, err := unet.NewSocket(ioFD) 223 if err != nil { 224 Fatalf("creating server on FD %d: %v", ioFD, err) 225 } 226 s := p9.NewServer(at) 227 if err := s.Handle(socket); err != nil { 228 Fatalf("P9 server returned error. Gofer is shutting down. FD: %d, err: %v", ioFD, err) 229 } 230 wg.Done() 231 }(ioFD, ats[i]) 232 } 233 wg.Wait() 234 log.Infof("All 9P servers exited.") 235 } 236 237 func (g *Gofer) writeMounts(mounts []specs.Mount) error { 238 bytes, err := json.Marshal(mounts) 239 if err != nil { 240 return err 241 } 242 243 f := os.NewFile(uintptr(g.mountsFD), "mounts file") 244 defer f.Close() 245 246 for written := 0; written < len(bytes); { 247 w, err := f.Write(bytes[written:]) 248 if err != nil { 249 return err 250 } 251 written += w 252 } 253 return nil 254 } 255 256 func isReadonlyMount(opts []string) bool { 257 for _, o := range opts { 258 if o == "ro" { 259 return true 260 } 261 } 262 return false 263 } 264 265 func setupRootFS(spec *specs.Spec, conf *config.Config) error { 266 // Convert all shared mounts into slaves to be sure that nothing will be 267 // propagated outside of our namespace. 268 procPath := "/proc" 269 if err := specutils.SafeMount("", "/", "", unix.MS_SLAVE|unix.MS_REC, "", procPath); err != nil { 270 Fatalf("error converting mounts: %v", err) 271 } 272 273 root := spec.Root.Path 274 if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { 275 // runsc can't be re-executed without /proc, so we create a tmpfs mount, 276 // mount ./proc and ./root there, then move this mount to the root and after 277 // setCapsAndCallSelf, runsc will chroot into /root. 278 // 279 // We need a directory to construct a new root and we know that 280 // runsc can't start without /proc, so we can use it for this. 281 flags := uintptr(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC) 282 if err := specutils.SafeMount("runsc-root", "/proc", "tmpfs", flags, "", procPath); err != nil { 283 Fatalf("error mounting tmpfs: %v", err) 284 } 285 286 // Prepare tree structure for pivot_root(2). 287 os.Mkdir("/proc/proc", 0755) 288 os.Mkdir("/proc/root", 0755) 289 // This cannot use SafeMount because there's no available procfs. But we 290 // know that /proc is an empty tmpfs mount, so this is safe. 291 if err := unix.Mount("runsc-proc", "/proc/proc", "proc", flags|unix.MS_RDONLY, ""); err != nil { 292 Fatalf("error mounting proc: %v", err) 293 } 294 root = "/proc/root" 295 procPath = "/proc/proc" 296 } 297 298 // Mount root path followed by submounts. 299 if err := specutils.SafeMount(spec.Root.Path, root, "bind", unix.MS_BIND|unix.MS_REC, "", procPath); err != nil { 300 return fmt.Errorf("mounting root on root (%q) err: %v", root, err) 301 } 302 303 flags := uint32(unix.MS_SLAVE | unix.MS_REC) 304 if spec.Linux != nil && spec.Linux.RootfsPropagation != "" { 305 flags = specutils.PropOptionsToFlags([]string{spec.Linux.RootfsPropagation}) 306 } 307 if err := specutils.SafeMount("", root, "", uintptr(flags), "", procPath); err != nil { 308 return fmt.Errorf("mounting root (%q) with flags: %#x, err: %v", root, flags, err) 309 } 310 311 // Replace the current spec, with the clean spec with symlinks resolved. 312 if err := setupMounts(conf, spec.Mounts, root, procPath); err != nil { 313 Fatalf("error setting up FS: %v", err) 314 } 315 316 // Create working directory if needed. 317 if spec.Process.Cwd != "" { 318 dst, err := resolveSymlinks(root, spec.Process.Cwd) 319 if err != nil { 320 return fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err) 321 } 322 log.Infof("Create working directory %q if needed", spec.Process.Cwd) 323 if err := os.MkdirAll(dst, 0755); err != nil { 324 return fmt.Errorf("creating working directory %q: %v", spec.Process.Cwd, err) 325 } 326 } 327 328 // Check if root needs to be remounted as readonly. 329 if spec.Root.Readonly || conf.Overlay { 330 // If root is a mount point but not read-only, we can change mount options 331 // to make it read-only for extra safety. 332 log.Infof("Remounting root as readonly: %q", root) 333 flags := uintptr(unix.MS_BIND | unix.MS_REMOUNT | unix.MS_RDONLY | unix.MS_REC) 334 if err := specutils.SafeMount(root, root, "bind", flags, "", procPath); err != nil { 335 return fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", root, root, flags, err) 336 } 337 } 338 339 if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { 340 if err := pivotRoot("/proc"); err != nil { 341 Fatalf("failed to change the root file system: %v", err) 342 } 343 if err := os.Chdir("/"); err != nil { 344 Fatalf("failed to change working directory") 345 } 346 } 347 return nil 348 } 349 350 // setupMounts bind mounts all mounts specified in the spec in their correct 351 // location inside root. It will resolve relative paths and symlinks. It also 352 // creates directories as needed. 353 func setupMounts(conf *config.Config, mounts []specs.Mount, root, procPath string) error { 354 for _, m := range mounts { 355 if !specutils.Is9PMount(m, conf.VFS2) { 356 continue 357 } 358 359 dst, err := resolveSymlinks(root, m.Destination) 360 if err != nil { 361 return fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err) 362 } 363 364 flags := specutils.OptionsToFlags(m.Options) | unix.MS_BIND 365 if conf.Overlay { 366 // Force mount read-only if writes are not going to be sent to it. 367 flags |= unix.MS_RDONLY 368 } 369 370 log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags) 371 if err := specutils.Mount(m.Source, dst, m.Type, flags, procPath); err != nil { 372 return fmt.Errorf("mounting %+v: %v", m, err) 373 } 374 375 // Set propagation options that cannot be set together with other options. 376 flags = specutils.PropOptionsToFlags(m.Options) 377 if flags != 0 { 378 if err := specutils.SafeMount("", dst, "", uintptr(flags), "", procPath); err != nil { 379 return fmt.Errorf("mount dst: %q, flags: %#x, err: %v", dst, flags, err) 380 } 381 } 382 } 383 return nil 384 } 385 386 // resolveMounts resolved relative paths and symlinks to mount points. 387 // 388 // Note: mount points must already be in place for resolution to work. 389 // Otherwise, it may follow symlinks to locations that would be overwritten 390 // with another mount point and return the wrong location. In short, make sure 391 // setupMounts() has been called before. 392 func resolveMounts(conf *config.Config, mounts []specs.Mount, root string) ([]specs.Mount, error) { 393 cleanMounts := make([]specs.Mount, 0, len(mounts)) 394 for _, m := range mounts { 395 if !specutils.Is9PMount(m, conf.VFS2) { 396 cleanMounts = append(cleanMounts, m) 397 continue 398 } 399 dst, err := resolveSymlinks(root, m.Destination) 400 if err != nil { 401 return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err) 402 } 403 relDst, err := filepath.Rel(root, dst) 404 if err != nil { 405 panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, root, err)) 406 } 407 408 opts, err := adjustMountOptions(conf, filepath.Join(root, relDst), m.Options) 409 if err != nil { 410 return nil, err 411 } 412 413 cpy := m 414 cpy.Destination = filepath.Join("/", relDst) 415 cpy.Options = opts 416 cleanMounts = append(cleanMounts, cpy) 417 } 418 return cleanMounts, nil 419 } 420 421 // ResolveSymlinks walks 'rel' having 'root' as the root directory. If there are 422 // symlinks, they are evaluated relative to 'root' to ensure the end result is 423 // the same as if the process was running inside the container. 424 func resolveSymlinks(root, rel string) (string, error) { 425 return resolveSymlinksImpl(root, root, rel, 255) 426 } 427 428 func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) { 429 if followCount == 0 { 430 return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel)) 431 } 432 433 rel = filepath.Clean(rel) 434 for _, name := range strings.Split(rel, string(filepath.Separator)) { 435 if name == "" { 436 continue 437 } 438 // Note that Join() resolves things like ".." and returns a clean path. 439 path := filepath.Join(base, name) 440 if !strings.HasPrefix(path, root) { 441 // One cannot '..' their way out of root. 442 base = root 443 continue 444 } 445 fi, err := os.Lstat(path) 446 if err != nil { 447 if !os.IsNotExist(err) { 448 return "", err 449 } 450 // Not found means there is no symlink to check. Just keep walking dirs. 451 base = path 452 continue 453 } 454 if fi.Mode()&os.ModeSymlink != 0 { 455 link, err := os.Readlink(path) 456 if err != nil { 457 return "", err 458 } 459 if filepath.IsAbs(link) { 460 base = root 461 } 462 base, err = resolveSymlinksImpl(root, base, link, followCount-1) 463 if err != nil { 464 return "", err 465 } 466 continue 467 } 468 base = path 469 } 470 return base, nil 471 } 472 473 // adjustMountOptions adds 'overlayfs_stale_read' if mounting over overlayfs. 474 func adjustMountOptions(conf *config.Config, path string, opts []string) ([]string, error) { 475 rv := make([]string, len(opts)) 476 copy(rv, opts) 477 478 statfs := unix.Statfs_t{} 479 if err := unix.Statfs(path, &statfs); err != nil { 480 return nil, err 481 } 482 if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC { 483 rv = append(rv, "overlayfs_stale_read") 484 } 485 return rv, nil 486 }