github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/runsc/cmd/gofer.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package cmd 16 17 import ( 18 "context" 19 "encoding/json" 20 "fmt" 21 "io" 22 "os" 23 "path/filepath" 24 "runtime" 25 "runtime/debug" 26 "strings" 27 28 "github.com/google/subcommands" 29 specs "github.com/opencontainers/runtime-spec/specs-go" 30 "github.com/ttpreport/gvisor-ligolo/pkg/log" 31 "github.com/ttpreport/gvisor-ligolo/pkg/unet" 32 "github.com/ttpreport/gvisor-ligolo/runsc/boot" 33 "github.com/ttpreport/gvisor-ligolo/runsc/cmd/util" 34 "github.com/ttpreport/gvisor-ligolo/runsc/config" 35 "github.com/ttpreport/gvisor-ligolo/runsc/flag" 36 "github.com/ttpreport/gvisor-ligolo/runsc/fsgofer" 37 "github.com/ttpreport/gvisor-ligolo/runsc/fsgofer/filter" 38 "github.com/ttpreport/gvisor-ligolo/runsc/profile" 39 "github.com/ttpreport/gvisor-ligolo/runsc/specutils" 40 "golang.org/x/sys/unix" 41 ) 42 43 var caps = []string{ 44 "CAP_CHOWN", 45 "CAP_DAC_OVERRIDE", 46 "CAP_DAC_READ_SEARCH", 47 "CAP_FOWNER", 48 "CAP_FSETID", 49 "CAP_SYS_CHROOT", 50 } 51 52 // goferCaps is the minimal set of capabilities needed by the Gofer to operate 53 // on files. 54 var goferCaps = &specs.LinuxCapabilities{ 55 Bounding: caps, 56 Effective: caps, 57 Permitted: caps, 58 } 59 60 // goferSyncFDs contains file descriptors that are used for synchronization 61 // of the Gofer startup process against other processes. 62 type goferSyncFDs struct { 63 // nvproxyFD is a file descriptor that is used to wait until 64 // nvproxy-related setup is done. This setup involves creating mounts in the 65 // Gofer process's mount namespace. 66 // If this is set, this FD is the first that the Gofer waits for. 67 nvproxyFD int 68 // usernsFD is a file descriptor that is used to wait until 69 // user namespace ID mappings are established in the Gofer's userns. 70 // If this is set, this FD is the second that the Gofer waits for. 71 usernsFD int 72 // procMountFD is a file descriptor that has to be closed when the 73 // procfs mount isn't needed anymore. It is read by the procfs unmounter 74 // process. 75 // If this is set, this FD is the last that the Gofer interacts with and 76 // closes. 77 procMountFD int 78 } 79 80 // Gofer implements subcommands.Command for the "gofer" command, which starts a 81 // filesystem gofer. This command should not be called directly. 82 type Gofer struct { 83 bundleDir string 84 ioFDs intFlags 85 applyCaps bool 86 setUpRoot bool 87 overlayMediums boot.OverlayMediumFlags 88 89 specFD int 90 mountsFD int 91 profileFDs profile.FDArgs 92 syncFDs goferSyncFDs 93 stopProfiling func() 94 } 95 96 // Name implements subcommands.Command. 97 func (*Gofer) Name() string { 98 return "gofer" 99 } 100 101 // Synopsis implements subcommands.Command. 102 func (g *Gofer) Synopsis() string { 103 return fmt.Sprintf("launch a gofer process that proxies access to container files") 104 } 105 106 // Usage implements subcommands.Command. 107 func (*Gofer) Usage() string { 108 return `gofer [flags]` 109 } 110 111 // SetFlags implements subcommands.Command. 112 func (g *Gofer) SetFlags(f *flag.FlagSet) { 113 f.StringVar(&g.bundleDir, "bundle", "", "path to the root of the bundle directory, defaults to the current directory") 114 f.BoolVar(&g.applyCaps, "apply-caps", true, "if true, apply capabilities to restrict what the Gofer process can do") 115 f.BoolVar(&g.setUpRoot, "setup-root", true, "if true, set up an empty root for the process") 116 117 // Open FDs that are donated to the gofer. 118 f.Var(&g.ioFDs, "io-fds", "list of FDs to connect gofer servers. They must follow this order: root first, then mounts as defined in the spec") 119 f.Var(&g.overlayMediums, "overlay-mediums", "information about how the gofer mounts have been overlaid.") 120 f.IntVar(&g.specFD, "spec-fd", -1, "required fd with the container spec") 121 f.IntVar(&g.mountsFD, "mounts-fd", -1, "mountsFD is the file descriptor to write list of mounts after they have been resolved (direct paths, no symlinks).") 122 123 // Add synchronization FD flags. 124 g.syncFDs.setFlags(f) 125 126 // Profiling flags. 127 g.profileFDs.SetFromFlags(f) 128 } 129 130 // Execute implements subcommands.Command. 131 func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...any) subcommands.ExitStatus { 132 if g.bundleDir == "" || len(g.ioFDs) < 1 || g.specFD < 0 { 133 f.Usage() 134 return subcommands.ExitUsageError 135 } 136 137 conf := args[0].(*config.Config) 138 139 // Set traceback level 140 debug.SetTraceback(conf.Traceback) 141 142 specFile := os.NewFile(uintptr(g.specFD), "spec file") 143 defer specFile.Close() 144 spec, err := specutils.ReadSpecFromFile(g.bundleDir, specFile, conf) 145 if err != nil { 146 util.Fatalf("reading spec: %v", err) 147 } 148 149 g.syncFDs.syncNVProxy() 150 g.syncFDs.syncUsernsForRootless() 151 152 if g.setUpRoot { 153 if err := g.setupRootFS(spec, conf); err != nil { 154 util.Fatalf("Error setting up root FS: %v", err) 155 } 156 if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { 157 cleanupUnmounter := g.syncFDs.spawnProcUnmounter() 158 defer cleanupUnmounter() 159 } 160 } 161 if g.applyCaps { 162 // Disable caps when calling myself again. 163 // Note: minimal argument handling for the default case to keep it simple. 164 args := os.Args 165 args = append( 166 args, 167 "--apply-caps=false", 168 "--setup-root=false", 169 ) 170 args = append(args, g.syncFDs.flags()...) 171 util.Fatalf("setCapsAndCallSelf(%v, %v): %v", args, goferCaps, setCapsAndCallSelf(args, goferCaps)) 172 panic("unreachable") 173 } 174 175 // Start profiling. This will be a noop if no profiling arguments were passed. 176 profileOpts := g.profileFDs.ToOpts() 177 g.stopProfiling = profile.Start(profileOpts) 178 179 // At this point we won't re-execute, so it's safe to limit via rlimits. Any 180 // limit >= 0 works. If the limit is lower than the current number of open 181 // files, then Setrlimit will succeed, and the next open will fail. 182 if conf.FDLimit > -1 { 183 rlimit := unix.Rlimit{ 184 Cur: uint64(conf.FDLimit), 185 Max: uint64(conf.FDLimit), 186 } 187 switch err := unix.Setrlimit(unix.RLIMIT_NOFILE, &rlimit); err { 188 case nil: 189 case unix.EPERM: 190 log.Warningf("FD limit %d is higher than the current hard limit or system-wide maximum", conf.FDLimit) 191 default: 192 util.Fatalf("Failed to set RLIMIT_NOFILE: %v", err) 193 } 194 } 195 196 // Find what path is going to be served by this gofer. 197 root := spec.Root.Path 198 if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { 199 root = "/root" 200 } 201 202 // Resolve mount points paths, then replace mounts from our spec and send the 203 // mount list over to the sandbox, so they are both in sync. 204 // 205 // Note that all mount points have been mounted in the proper location in 206 // setupRootFS(). 207 cleanMounts, err := resolveMounts(conf, spec.Mounts, root) 208 if err != nil { 209 util.Fatalf("Failure to resolve mounts: %v", err) 210 } 211 spec.Mounts = cleanMounts 212 go func() { 213 if err := g.writeMounts(cleanMounts); err != nil { 214 panic(fmt.Sprintf("Failed to write mounts: %v", err)) 215 } 216 }() 217 218 specutils.LogSpecDebug(spec, conf.OCISeccomp) 219 220 // fsgofer should run with a umask of 0, because we want to preserve file 221 // modes exactly as sent by the sandbox, which will have applied its own umask. 222 unix.Umask(0) 223 224 if err := fsgofer.OpenProcSelfFD(); err != nil { 225 util.Fatalf("failed to open /proc/self/fd: %v", err) 226 } 227 228 // procfs isn't needed anymore. 229 g.syncFDs.unmountProcfs() 230 231 if err := unix.Chroot(root); err != nil { 232 util.Fatalf("failed to chroot to %q: %v", root, err) 233 } 234 if err := unix.Chdir("/"); err != nil { 235 util.Fatalf("changing working dir: %v", err) 236 } 237 log.Infof("Process chroot'd to %q", root) 238 239 // Initialize filters. 240 opts := filter.Options{ 241 UDSOpenEnabled: conf.GetHostUDS().AllowOpen(), 242 UDSCreateEnabled: conf.GetHostUDS().AllowCreate(), 243 ProfileEnabled: len(profileOpts) > 0, 244 } 245 if err := filter.Install(opts); err != nil { 246 util.Fatalf("installing seccomp filters: %v", err) 247 } 248 249 return g.serve(spec, conf, root) 250 } 251 252 func newSocket(ioFD int) *unet.Socket { 253 socket, err := unet.NewSocket(ioFD) 254 if err != nil { 255 util.Fatalf("creating server on FD %d: %v", ioFD, err) 256 } 257 return socket 258 } 259 260 func (g *Gofer) serve(spec *specs.Spec, conf *config.Config, root string) subcommands.ExitStatus { 261 type connectionConfig struct { 262 sock *unet.Socket 263 mountPath string 264 readonly bool 265 } 266 cfgs := make([]connectionConfig, 0, len(spec.Mounts)+1) 267 server := fsgofer.NewLisafsServer(fsgofer.Config{ 268 // These are global options. Ignore readonly configuration, that is set on 269 // a per connection basis. 270 HostUDS: conf.GetHostUDS(), 271 HostFifo: conf.HostFifo, 272 DonateMountPointFD: conf.DirectFS, 273 }) 274 275 // Start with root mount, then add any other additional mount as needed. 276 cfgs = append(cfgs, connectionConfig{ 277 sock: newSocket(g.ioFDs[0]), 278 mountPath: "/", // fsgofer process is always chroot()ed. So serve root. 279 readonly: spec.Root.Readonly || g.overlayMediums[0].IsEnabled(), 280 }) 281 log.Infof("Serving %q mapped to %q on FD %d (ro: %t)", "/", root, g.ioFDs[0], cfgs[0].readonly) 282 283 mountIdx := 1 // first one is the root 284 for _, m := range spec.Mounts { 285 if !specutils.IsGoferMount(m) { 286 continue 287 } 288 289 if !filepath.IsAbs(m.Destination) { 290 util.Fatalf("mount destination must be absolute: %q", m.Destination) 291 } 292 if mountIdx >= len(g.ioFDs) { 293 util.Fatalf("no FD found for mount. Did you forget --io-fd? FDs: %d, Mount: %+v", len(g.ioFDs), m) 294 } 295 296 cfgs = append(cfgs, connectionConfig{ 297 sock: newSocket(g.ioFDs[mountIdx]), 298 mountPath: m.Destination, 299 readonly: specutils.IsReadonlyMount(m.Options) || g.overlayMediums[mountIdx].IsEnabled(), 300 }) 301 302 log.Infof("Serving %q mapped on FD %d (ro: %t)", m.Destination, g.ioFDs[mountIdx], cfgs[mountIdx].readonly) 303 mountIdx++ 304 } 305 306 if mountIdx != len(g.ioFDs) { 307 util.Fatalf("too many FDs passed for mounts. mounts: %d, FDs: %d", mountIdx, len(g.ioFDs)) 308 } 309 cfgs = cfgs[:mountIdx] 310 311 for _, cfg := range cfgs { 312 conn, err := server.CreateConnection(cfg.sock, cfg.mountPath, cfg.readonly) 313 if err != nil { 314 util.Fatalf("starting connection on FD %d for gofer mount failed: %v", cfg.sock.FD(), err) 315 } 316 server.StartConnection(conn) 317 } 318 server.Wait() 319 server.Destroy() 320 log.Infof("All lisafs servers exited.") 321 if g.stopProfiling != nil { 322 g.stopProfiling() 323 } 324 return subcommands.ExitSuccess 325 } 326 327 func (g *Gofer) writeMounts(mounts []specs.Mount) error { 328 bytes, err := json.Marshal(mounts) 329 if err != nil { 330 return err 331 } 332 333 f := os.NewFile(uintptr(g.mountsFD), "mounts file") 334 defer f.Close() 335 336 for written := 0; written < len(bytes); { 337 w, err := f.Write(bytes[written:]) 338 if err != nil { 339 return err 340 } 341 written += w 342 } 343 return nil 344 } 345 346 func (g *Gofer) setupRootFS(spec *specs.Spec, conf *config.Config) error { 347 // Convert all shared mounts into slaves to be sure that nothing will be 348 // propagated outside of our namespace. 349 procPath := "/proc" 350 if err := specutils.SafeMount("", "/", "", unix.MS_SLAVE|unix.MS_REC, "", procPath); err != nil { 351 util.Fatalf("error converting mounts: %v", err) 352 } 353 354 root := spec.Root.Path 355 if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { 356 // runsc can't be re-executed without /proc, so we create a tmpfs mount, 357 // mount ./proc and ./root there, then move this mount to the root and after 358 // setCapsAndCallSelf, runsc will chroot into /root. 359 // 360 // We need a directory to construct a new root and we know that 361 // runsc can't start without /proc, so we can use it for this. 362 flags := uintptr(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC) 363 if err := specutils.SafeMount("runsc-root", "/proc", "tmpfs", flags, "", procPath); err != nil { 364 util.Fatalf("error mounting tmpfs: %v", err) 365 } 366 367 // Prepare tree structure for pivot_root(2). 368 if err := os.Mkdir("/proc/proc", 0755); err != nil { 369 util.Fatalf("error creating /proc/proc: %v", err) 370 } 371 if err := os.Mkdir("/proc/root", 0755); err != nil { 372 util.Fatalf("error creating /proc/root: %v", err) 373 } 374 if err := os.Mkdir("/proc/etc", 0755); err != nil { 375 util.Fatalf("error creating /proc/etc: %v", err) 376 } 377 // This cannot use SafeMount because there's no available procfs. But we 378 // know that /proc is an empty tmpfs mount, so this is safe. 379 if err := unix.Mount("runsc-proc", "/proc/proc", "proc", flags|unix.MS_RDONLY, ""); err != nil { 380 util.Fatalf("error mounting proc: %v", err) 381 } 382 // self/fd is bind-mounted, so that the FD return by 383 // OpenProcSelfFD() does not allow escapes with walking ".." . 384 if err := unix.Mount("/proc/proc/self/fd", "/proc/proc/self/fd", 385 "", unix.MS_RDONLY|unix.MS_BIND|unix.MS_NOEXEC, ""); err != nil { 386 util.Fatalf("error mounting proc/self/fd: %v", err) 387 } 388 if err := copyFile("/proc/etc/localtime", "/etc/localtime"); err != nil { 389 log.Warningf("Failed to copy /etc/localtime: %v. UTC timezone will be used.", err) 390 } 391 root = "/proc/root" 392 procPath = "/proc/proc" 393 } 394 395 // Mount root path followed by submounts. 396 if err := specutils.SafeMount(spec.Root.Path, root, "bind", unix.MS_BIND|unix.MS_REC, "", procPath); err != nil { 397 return fmt.Errorf("mounting root on root (%q) err: %v", root, err) 398 } 399 400 flags := uint32(unix.MS_SLAVE | unix.MS_REC) 401 if spec.Linux != nil && spec.Linux.RootfsPropagation != "" { 402 flags = specutils.PropOptionsToFlags([]string{spec.Linux.RootfsPropagation}) 403 } 404 if err := specutils.SafeMount("", root, "", uintptr(flags), "", procPath); err != nil { 405 return fmt.Errorf("mounting root (%q) with flags: %#x, err: %v", root, flags, err) 406 } 407 408 // Replace the current spec, with the clean spec with symlinks resolved. 409 if err := g.setupMounts(conf, spec.Mounts, root, procPath); err != nil { 410 util.Fatalf("error setting up FS: %v", err) 411 } 412 413 // Create working directory if needed. 414 if spec.Process.Cwd != "" { 415 dst, err := resolveSymlinks(root, spec.Process.Cwd) 416 if err != nil { 417 return fmt.Errorf("resolving symlinks to %q: %v", spec.Process.Cwd, err) 418 } 419 log.Infof("Create working directory %q if needed", spec.Process.Cwd) 420 if err := os.MkdirAll(dst, 0755); err != nil { 421 return fmt.Errorf("creating working directory %q: %v", spec.Process.Cwd, err) 422 } 423 } 424 425 // Check if root needs to be remounted as readonly. 426 if spec.Root.Readonly || g.overlayMediums[0].IsEnabled() { 427 // If root is a mount point but not read-only, we can change mount options 428 // to make it read-only for extra safety. 429 log.Infof("Remounting root as readonly: %q", root) 430 flags := uintptr(unix.MS_BIND | unix.MS_REMOUNT | unix.MS_RDONLY | unix.MS_REC) 431 if err := specutils.SafeMount(root, root, "bind", flags, "", procPath); err != nil { 432 return fmt.Errorf("remounting root as read-only with source: %q, target: %q, flags: %#x, err: %v", root, root, flags, err) 433 } 434 } 435 436 if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot { 437 if err := pivotRoot("/proc"); err != nil { 438 util.Fatalf("failed to change the root file system: %v", err) 439 } 440 if err := os.Chdir("/"); err != nil { 441 util.Fatalf("failed to change working directory") 442 } 443 } 444 return nil 445 } 446 447 // setupMounts bind mounts all mounts specified in the spec in their correct 448 // location inside root. It will resolve relative paths and symlinks. It also 449 // creates directories as needed. 450 func (g *Gofer) setupMounts(conf *config.Config, mounts []specs.Mount, root, procPath string) error { 451 goferMntIdx := 1 // First index is for rootfs. 452 for _, m := range mounts { 453 if !specutils.IsGoferMount(m) { 454 continue 455 } 456 457 dst, err := resolveSymlinks(root, m.Destination) 458 if err != nil { 459 return fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err) 460 } 461 462 flags := specutils.OptionsToFlags(m.Options) | unix.MS_BIND 463 if g.overlayMediums[goferMntIdx].IsEnabled() { 464 // Force mount read-only if writes are not going to be sent to it. 465 flags |= unix.MS_RDONLY 466 } 467 468 log.Infof("Mounting src: %q, dst: %q, flags: %#x", m.Source, dst, flags) 469 if err := specutils.SafeSetupAndMount(m.Source, dst, m.Type, flags, procPath); err != nil { 470 return fmt.Errorf("mounting %+v: %v", m, err) 471 } 472 473 // Set propagation options that cannot be set together with other options. 474 flags = specutils.PropOptionsToFlags(m.Options) 475 if flags != 0 { 476 if err := specutils.SafeMount("", dst, "", uintptr(flags), "", procPath); err != nil { 477 return fmt.Errorf("mount dst: %q, flags: %#x, err: %v", dst, flags, err) 478 } 479 } 480 goferMntIdx++ 481 } 482 return nil 483 } 484 485 // resolveMounts resolved relative paths and symlinks to mount points. 486 // 487 // Note: mount points must already be in place for resolution to work. 488 // Otherwise, it may follow symlinks to locations that would be overwritten 489 // with another mount point and return the wrong location. In short, make sure 490 // setupMounts() has been called before. 491 func resolveMounts(conf *config.Config, mounts []specs.Mount, root string) ([]specs.Mount, error) { 492 cleanMounts := make([]specs.Mount, 0, len(mounts)) 493 for _, m := range mounts { 494 if !specutils.IsGoferMount(m) { 495 cleanMounts = append(cleanMounts, m) 496 continue 497 } 498 dst, err := resolveSymlinks(root, m.Destination) 499 if err != nil { 500 return nil, fmt.Errorf("resolving symlinks to %q: %v", m.Destination, err) 501 } 502 relDst, err := filepath.Rel(root, dst) 503 if err != nil { 504 panic(fmt.Sprintf("%q could not be made relative to %q: %v", dst, root, err)) 505 } 506 507 opts, err := adjustMountOptions(conf, filepath.Join(root, relDst), m.Options) 508 if err != nil { 509 return nil, err 510 } 511 512 cpy := m 513 cpy.Destination = filepath.Join("/", relDst) 514 cpy.Options = opts 515 cleanMounts = append(cleanMounts, cpy) 516 } 517 return cleanMounts, nil 518 } 519 520 // ResolveSymlinks walks 'rel' having 'root' as the root directory. If there are 521 // symlinks, they are evaluated relative to 'root' to ensure the end result is 522 // the same as if the process was running inside the container. 523 func resolveSymlinks(root, rel string) (string, error) { 524 return resolveSymlinksImpl(root, root, rel, 255) 525 } 526 527 func resolveSymlinksImpl(root, base, rel string, followCount uint) (string, error) { 528 if followCount == 0 { 529 return "", fmt.Errorf("too many symlinks to follow, path: %q", filepath.Join(base, rel)) 530 } 531 532 rel = filepath.Clean(rel) 533 for _, name := range strings.Split(rel, string(filepath.Separator)) { 534 if name == "" { 535 continue 536 } 537 // Note that Join() resolves things like ".." and returns a clean path. 538 path := filepath.Join(base, name) 539 if !strings.HasPrefix(path, root) { 540 // One cannot '..' their way out of root. 541 base = root 542 continue 543 } 544 fi, err := os.Lstat(path) 545 if err != nil { 546 if !os.IsNotExist(err) { 547 return "", err 548 } 549 // Not found means there is no symlink to check. Just keep walking dirs. 550 base = path 551 continue 552 } 553 if fi.Mode()&os.ModeSymlink != 0 { 554 link, err := os.Readlink(path) 555 if err != nil { 556 return "", err 557 } 558 if filepath.IsAbs(link) { 559 base = root 560 } 561 base, err = resolveSymlinksImpl(root, base, link, followCount-1) 562 if err != nil { 563 return "", err 564 } 565 continue 566 } 567 base = path 568 } 569 return base, nil 570 } 571 572 // adjustMountOptions adds 'overlayfs_stale_read' if mounting over overlayfs. 573 func adjustMountOptions(conf *config.Config, path string, opts []string) ([]string, error) { 574 rv := make([]string, len(opts)) 575 copy(rv, opts) 576 577 statfs := unix.Statfs_t{} 578 if err := unix.Statfs(path, &statfs); err != nil { 579 return nil, err 580 } 581 if statfs.Type == unix.OVERLAYFS_SUPER_MAGIC { 582 rv = append(rv, "overlayfs_stale_read") 583 } 584 return rv, nil 585 } 586 587 // setFlags sets sync FD flags on the given FlagSet. 588 func (g *goferSyncFDs) setFlags(f *flag.FlagSet) { 589 f.IntVar(&g.nvproxyFD, "sync-nvproxy-fd", -1, "file descriptor that the gofer waits on until nvproxy setup is done") 590 f.IntVar(&g.usernsFD, "sync-userns-fd", -1, "file descriptor the the gofer waits on until userns mappings are set up") 591 f.IntVar(&g.procMountFD, "proc-mount-sync-fd", -1, "file descriptor that the gofer writes to when /proc isn't needed anymore and can be unmounted") 592 } 593 594 // flags returns the flags necessary to pass along the current sync FD values 595 // to a re-executed version of this process. 596 func (g *goferSyncFDs) flags() []string { 597 return []string{ 598 fmt.Sprintf("--sync-nvproxy-fd=%d", g.nvproxyFD), 599 fmt.Sprintf("--sync-userns-fd=%d", g.usernsFD), 600 fmt.Sprintf("--proc-mount-sync-fd=%d", g.procMountFD), 601 } 602 } 603 604 // waitForFD waits for the other end of a given FD to be closed. 605 // `fd` is closed unconditionally after that. 606 // This should only be called for actual FDs (i.e. `fd` >= 0). 607 func waitForFD(fd int, fdName string) error { 608 log.Debugf("Waiting on %s %d...", fdName, fd) 609 f := os.NewFile(uintptr(fd), fdName) 610 defer f.Close() 611 var b [1]byte 612 if n, err := f.Read(b[:]); n != 0 || err != io.EOF { 613 return fmt.Errorf("failed to sync on %s: %v: %v", fdName, n, err) 614 } 615 log.Debugf("Synced on %s %d.", fdName, fd) 616 return nil 617 } 618 619 // spawnProcMounter executes the /proc unmounter process. 620 // It returns a function to wait on the proc unmounter process, which 621 // should be called (via defer) in case of errors in order to clean up the 622 // unmounter process properly. 623 // When procfs is no longer needed, `unmountProcfs` should be called. 624 func (g *goferSyncFDs) spawnProcUnmounter() func() { 625 if g.procMountFD != -1 { 626 util.Fatalf("procMountFD is set") 627 } 628 // /proc is umounted from a forked process, because the 629 // current one may re-execute itself without capabilities. 630 cmd, w := execProcUmounter() 631 // Clear FD_CLOEXEC. This process may be re-executed. procMountFD 632 // should remain open. 633 if _, _, errno := unix.RawSyscall(unix.SYS_FCNTL, w.Fd(), unix.F_SETFD, 0); errno != 0 { 634 util.Fatalf("error clearing CLOEXEC: %v", errno) 635 } 636 g.procMountFD = int(w.Fd()) 637 return func() { 638 g.procMountFD = -1 639 w.Close() 640 cmd.Wait() 641 } 642 } 643 644 // unmountProcfs signals the proc unmounter process that procfs is no longer 645 // needed. 646 func (g *goferSyncFDs) unmountProcfs() { 647 if g.procMountFD < 0 { 648 return 649 } 650 umountProc(g.procMountFD) 651 g.procMountFD = -1 652 } 653 654 // syncUsernsForRootless waits on usernsFD to be closed and then sets 655 // UID/GID to 0. Note that this function calls runtime.LockOSThread(). 656 // This function is a no-op if usernsFD is -1. 657 // 658 // Postcondition: All callers must re-exec themselves after this returns, 659 // unless usernsFD was -1. 660 func (g *goferSyncFDs) syncUsernsForRootless() { 661 syncUsernsForRootless(g.usernsFD) 662 g.usernsFD = -1 663 } 664 665 // syncUsernsForRootless waits on usernsFD to be closed and then sets 666 // UID/GID to 0. Note that this function calls runtime.LockOSThread(). 667 // This function is a no-op if usernsFD is -1. 668 // 669 // Postcondition: All callers must re-exec themselves after this returns, 670 // unless fd is -1. 671 func syncUsernsForRootless(fd int) { 672 if fd < 0 { 673 return 674 } 675 if err := waitForFD(fd, "userns sync FD"); err != nil { 676 util.Fatalf("failed to sync on userns FD: %v", err) 677 } 678 679 // SETUID changes UID on the current system thread, so we have 680 // to re-execute current binary. 681 runtime.LockOSThread() 682 if _, _, errno := unix.RawSyscall(unix.SYS_SETUID, 0, 0, 0); errno != 0 { 683 util.Fatalf("failed to set UID: %v", errno) 684 } 685 if _, _, errno := unix.RawSyscall(unix.SYS_SETGID, 0, 0, 0); errno != 0 { 686 util.Fatalf("failed to set GID: %v", errno) 687 } 688 } 689 690 // syncNVProxy waits on nvproxyFD to be closed. 691 // Used for synchronization during nvproxy setup which is done from the 692 // non-gofer process. 693 // This function is a no-op if nvProxySyncFD is -1. 694 func (g *goferSyncFDs) syncNVProxy() { 695 if g.nvproxyFD < 0 { 696 return 697 } 698 if err := waitForFD(g.nvproxyFD, "nvproxy sync FD"); err != nil { 699 util.Fatalf("failed to sync on NVProxy FD: %v", err) 700 } 701 g.nvproxyFD = -1 702 }