github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/runsc/specutils/specutils.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package specutils contains utility functions for working with OCI runtime 16 // specs. 17 package specutils 18 19 import ( 20 "encoding/json" 21 "fmt" 22 "io" 23 "io/ioutil" 24 "os" 25 "path" 26 "path/filepath" 27 "strconv" 28 "strings" 29 "time" 30 31 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 32 "github.com/MerlinKodo/gvisor/pkg/bits" 33 "github.com/MerlinKodo/gvisor/pkg/log" 34 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth" 35 "github.com/MerlinKodo/gvisor/runsc/config" 36 "github.com/MerlinKodo/gvisor/runsc/flag" 37 "github.com/cenkalti/backoff" 38 "github.com/mohae/deepcopy" 39 specs "github.com/opencontainers/runtime-spec/specs-go" 40 "golang.org/x/sys/unix" 41 ) 42 43 const ( 44 annotationFlagPrefix = "dev.gvisor.flag." 45 annotationSeccomp = "dev.gvisor.internal.seccomp." 46 annotationSeccompRuntimeDefault = "RuntimeDefault" 47 48 annotationContainerName = "io.kubernetes.cri.container-name" 49 ) 50 51 // ExePath must point to runsc binary, which is normally the same binary. It's 52 // changed in tests that aren't linked in the same binary. 53 var ExePath = "/proc/self/exe" 54 55 // Version is the supported spec version. 56 var Version = specs.Version 57 58 // LogSpecDebug writes the spec in a human-friendly format to the debug log. 59 func LogSpecDebug(orig *specs.Spec, logSeccomp bool) { 60 if !log.IsLogging(log.Debug) { 61 return 62 } 63 64 // Strip down parts of the spec that are not interesting. 65 spec := deepcopy.Copy(orig).(*specs.Spec) 66 if spec.Process != nil { 67 spec.Process.Capabilities = nil 68 } 69 if spec.Linux != nil { 70 if !logSeccomp { 71 spec.Linux.Seccomp = nil 72 } 73 spec.Linux.MaskedPaths = nil 74 spec.Linux.ReadonlyPaths = nil 75 if spec.Linux.Resources != nil { 76 spec.Linux.Resources.Devices = nil 77 } 78 } 79 80 out, err := json.MarshalIndent(spec, "", " ") 81 if err != nil { 82 log.Debugf("Failed to marshal spec: %v", err) 83 return 84 } 85 log.Debugf("Spec:\n%s", out) 86 } 87 88 // ValidateSpec validates that the spec is compatible with runsc. 89 func ValidateSpec(spec *specs.Spec) error { 90 // Mandatory fields. 91 if spec.Process == nil { 92 return fmt.Errorf("Spec.Process must be defined: %+v", spec) 93 } 94 if len(spec.Process.Args) == 0 { 95 return fmt.Errorf("Spec.Process.Arg must be defined: %+v", spec.Process) 96 } 97 if spec.Root == nil { 98 return fmt.Errorf("Spec.Root must be defined: %+v", spec) 99 } 100 if len(spec.Root.Path) == 0 { 101 return fmt.Errorf("Spec.Root.Path must be defined: %+v", spec.Root) 102 } 103 104 // Unsupported fields. 105 if spec.Solaris != nil { 106 return fmt.Errorf("Spec.Solaris is not supported: %+v", spec) 107 } 108 if spec.Windows != nil { 109 return fmt.Errorf("Spec.Windows is not supported: %+v", spec) 110 } 111 if len(spec.Process.SelinuxLabel) != 0 { 112 return fmt.Errorf("SELinux is not supported: %s", spec.Process.SelinuxLabel) 113 } 114 115 // Docker uses AppArmor by default, so just log that it's being ignored. 116 if spec.Process.ApparmorProfile != "" { 117 log.Warningf("AppArmor profile %q is being ignored", spec.Process.ApparmorProfile) 118 } 119 120 // PR_SET_NO_NEW_PRIVS is assumed to always be set. 121 // See kernel.Task.updateCredsForExecLocked. 122 if !spec.Process.NoNewPrivileges { 123 log.Warningf("noNewPrivileges ignored. PR_SET_NO_NEW_PRIVS is assumed to always be set.") 124 } 125 126 if spec.Linux != nil && spec.Linux.RootfsPropagation != "" { 127 if err := validateRootfsPropagation(spec.Linux.RootfsPropagation); err != nil { 128 return err 129 } 130 } 131 for _, m := range spec.Mounts { 132 if err := validateMount(&m); err != nil { 133 return err 134 } 135 } 136 137 // CRI specifies whether a container should start a new sandbox, or run 138 // another container in an existing sandbox. 139 switch SpecContainerType(spec) { 140 case ContainerTypeContainer: 141 // When starting a container in an existing sandbox, the 142 // sandbox ID must be set. 143 if _, ok := SandboxID(spec); !ok { 144 return fmt.Errorf("spec has container-type of container, but no sandbox ID set") 145 } 146 case ContainerTypeUnknown: 147 return fmt.Errorf("unknown container-type") 148 default: 149 } 150 151 return nil 152 } 153 154 // absPath turns the given path into an absolute path (if it is not already 155 // absolute) by prepending the base path. 156 func absPath(base, rel string) string { 157 if filepath.IsAbs(rel) { 158 return rel 159 } 160 return filepath.Join(base, rel) 161 } 162 163 // OpenSpec opens an OCI runtime spec from the given bundle directory. 164 func OpenSpec(bundleDir string) (*os.File, error) { 165 // The spec file must be named "config.json" inside the bundle directory. 166 return os.Open(filepath.Join(bundleDir, "config.json")) 167 } 168 169 // ReadSpec reads an OCI runtime spec from the given bundle directory. 170 // ReadSpec also normalizes all potential relative paths into absolute 171 // path, e.g. spec.Root.Path, mount.Source. 172 func ReadSpec(bundleDir string, conf *config.Config) (*specs.Spec, error) { 173 specFile, err := OpenSpec(bundleDir) 174 if err != nil { 175 return nil, fmt.Errorf("error opening spec file %q: %v", filepath.Join(bundleDir, "config.json"), err) 176 } 177 defer specFile.Close() 178 return ReadSpecFromFile(bundleDir, specFile, conf) 179 } 180 181 // ReadSpecFromFile reads an OCI runtime spec from the given file. It also fixes 182 // up the spec so that the rest of the code doesn't need to worry about it. 183 // 1. Normalizes all relative paths into absolute by prepending the bundle 184 // dir to them. 185 // 2. Looks for flag overrides and applies them if any. 186 // 3. Removes seccomp rules if `RuntimeDefault` was used. 187 func ReadSpecFromFile(bundleDir string, specFile *os.File, conf *config.Config) (*specs.Spec, error) { 188 if _, err := specFile.Seek(0, io.SeekStart); err != nil { 189 return nil, fmt.Errorf("error seeking to beginning of file %q: %v", specFile.Name(), err) 190 } 191 specBytes, err := ioutil.ReadAll(specFile) 192 if err != nil { 193 return nil, fmt.Errorf("error reading spec from file %q: %v", specFile.Name(), err) 194 } 195 var spec specs.Spec 196 if err := json.Unmarshal(specBytes, &spec); err != nil { 197 return nil, fmt.Errorf("error unmarshaling spec from file %q: %v\n %s", specFile.Name(), err, string(specBytes)) 198 } 199 if err := ValidateSpec(&spec); err != nil { 200 return nil, err 201 } 202 if err := fixSpec(&spec, bundleDir, conf); err != nil { 203 return nil, err 204 } 205 return &spec, nil 206 } 207 208 func fixSpec(spec *specs.Spec, bundleDir string, conf *config.Config) error { 209 // Turn any relative paths in the spec to absolute by prepending the bundleDir. 210 spec.Root.Path = absPath(bundleDir, spec.Root.Path) 211 for i := range spec.Mounts { 212 m := &spec.Mounts[i] 213 if m.Source != "" { 214 m.Source = absPath(bundleDir, m.Source) 215 } 216 } 217 // Look for config bundle annotations and verify that they exist. 218 const configBundlePrefix = "dev.gvisor.bundle." 219 var bundles []config.BundleName 220 for annotation, val := range spec.Annotations { 221 if !strings.HasPrefix(annotation, configBundlePrefix) { 222 continue 223 } 224 if val != "true" { 225 return fmt.Errorf("invalid value %q for annotation %q (must be set to 'true' or removed entirely)", val, annotation) 226 } 227 bundleName := config.BundleName(annotation[len(configBundlePrefix):]) 228 if _, exists := config.Bundles[bundleName]; !exists { 229 log.Warningf("Bundle name %q (from annotation %q=%q) does not exist; this bundle may have been deprecated. Skipping.", bundleName, annotation, val) 230 continue 231 } 232 bundles = append(bundles, bundleName) 233 } 234 235 // Apply config bundles, if any. 236 if len(bundles) > 0 { 237 log.Infof("Applying config bundles: %v", bundles) 238 if err := conf.ApplyBundles(flag.CommandLine, bundles...); err != nil { 239 return err 240 } 241 } 242 243 // Check annotation to see if container name is available. 244 var containerName string 245 for key, val := range spec.Annotations { 246 if key == annotationContainerName { 247 containerName = val 248 log.Debugf("Container name: %q", containerName) 249 break 250 } 251 } 252 for annotation, val := range spec.Annotations { 253 if strings.HasPrefix(annotation, annotationFlagPrefix) { 254 // Override flags using annotation to allow customization per sandbox 255 // instance. 256 name := annotation[len(annotationFlagPrefix):] 257 log.Infof("Overriding flag from flag annotation: --%s=%q", name, val) 258 if err := conf.Override(flag.CommandLine, name, val /* force= */, false); err != nil { 259 return err 260 } 261 } else if len(containerName) > 0 { 262 // If we know the container name, then check to see if seccomp 263 // instructions were given to the the container. 264 if annotation == annotationSeccomp+containerName && val == annotationSeccompRuntimeDefault { 265 // Container seccomp rules are redundant when using gVisor, so remove 266 // them when seccomp is set to RuntimeDefault. 267 if spec.Linux != nil && spec.Linux.Seccomp != nil { 268 log.Debugf("Seccomp is being ignored because annotation %q is set to default.", annotationSeccomp) 269 spec.Linux.Seccomp = nil 270 } 271 } 272 } 273 } 274 return nil 275 } 276 277 // ReadMounts reads mount list from a file. 278 func ReadMounts(f *os.File) ([]specs.Mount, error) { 279 bytes, err := ioutil.ReadAll(f) 280 if err != nil { 281 return nil, fmt.Errorf("error reading mounts: %v", err) 282 } 283 var mounts []specs.Mount 284 if err := json.Unmarshal(bytes, &mounts); err != nil { 285 return nil, fmt.Errorf("error unmarshaling mounts: %v\nJSON bytes:\n%s", err, string(bytes)) 286 } 287 return mounts, nil 288 } 289 290 // Capabilities takes in spec and returns a TaskCapabilities corresponding to 291 // the spec. 292 func Capabilities(enableRaw bool, specCaps *specs.LinuxCapabilities) (*auth.TaskCapabilities, error) { 293 // Strip CAP_NET_RAW from all capability sets if necessary. 294 skipSet := map[linux.Capability]struct{}{} 295 if !enableRaw { 296 skipSet[linux.CAP_NET_RAW] = struct{}{} 297 } 298 299 var caps auth.TaskCapabilities 300 if specCaps != nil { 301 var err error 302 if caps.BoundingCaps, err = capsFromNames(specCaps.Bounding, skipSet); err != nil { 303 return nil, err 304 } 305 if caps.EffectiveCaps, err = capsFromNames(specCaps.Effective, skipSet); err != nil { 306 return nil, err 307 } 308 if caps.InheritableCaps, err = capsFromNames(specCaps.Inheritable, skipSet); err != nil { 309 return nil, err 310 } 311 if caps.PermittedCaps, err = capsFromNames(specCaps.Permitted, skipSet); err != nil { 312 return nil, err 313 } 314 // TODO(gvisor.dev/issue/3166): Support ambient capabilities. 315 } 316 return &caps, nil 317 } 318 319 // AllCapabilities returns a LinuxCapabilities struct with all capabilities. 320 func AllCapabilities() *specs.LinuxCapabilities { 321 var names []string 322 for n := range capFromName { 323 names = append(names, n) 324 } 325 return &specs.LinuxCapabilities{ 326 Bounding: names, 327 Effective: names, 328 Inheritable: names, 329 Permitted: names, 330 Ambient: names, 331 } 332 } 333 334 // AllCapabilitiesUint64 returns a bitmask containing all capabilities set. 335 func AllCapabilitiesUint64() uint64 { 336 var rv uint64 337 for _, cap := range capFromName { 338 rv |= bits.MaskOf64(int(cap)) 339 } 340 return rv 341 } 342 343 // MergeCapabilities merges the capabilites from first and second. 344 func MergeCapabilities(first, second *specs.LinuxCapabilities) *specs.LinuxCapabilities { 345 return &specs.LinuxCapabilities{ 346 Bounding: mergeUnique(first.Bounding, second.Bounding), 347 Effective: mergeUnique(first.Effective, second.Effective), 348 Inheritable: mergeUnique(first.Inheritable, second.Inheritable), 349 Permitted: mergeUnique(first.Permitted, second.Permitted), 350 Ambient: mergeUnique(first.Ambient, second.Ambient), 351 } 352 } 353 354 // DropCapability removes the specified capability from all capability sets. 355 func DropCapability(caps *specs.LinuxCapabilities, drop string) { 356 caps.Bounding = remove(caps.Bounding, drop) 357 caps.Effective = remove(caps.Effective, drop) 358 caps.Inheritable = remove(caps.Inheritable, drop) 359 caps.Permitted = remove(caps.Permitted, drop) 360 caps.Ambient = remove(caps.Ambient, drop) 361 } 362 363 func mergeUnique(strSlices ...[]string) []string { 364 common := make(map[string]struct{}) 365 for _, strSlice := range strSlices { 366 for _, s := range strSlice { 367 common[s] = struct{}{} 368 } 369 } 370 371 res := make([]string, 0, len(common)) 372 for s := range common { 373 res = append(res, s) 374 } 375 return res 376 } 377 378 func remove(ss []string, rem string) []string { 379 var out []string 380 for _, s := range ss { 381 if s == rem { 382 continue 383 } 384 out = append(out, s) 385 } 386 return out 387 } 388 389 var capFromName = map[string]linux.Capability{ 390 "CAP_CHOWN": linux.CAP_CHOWN, 391 "CAP_DAC_OVERRIDE": linux.CAP_DAC_OVERRIDE, 392 "CAP_DAC_READ_SEARCH": linux.CAP_DAC_READ_SEARCH, 393 "CAP_FOWNER": linux.CAP_FOWNER, 394 "CAP_FSETID": linux.CAP_FSETID, 395 "CAP_KILL": linux.CAP_KILL, 396 "CAP_SETGID": linux.CAP_SETGID, 397 "CAP_SETUID": linux.CAP_SETUID, 398 "CAP_SETPCAP": linux.CAP_SETPCAP, 399 "CAP_LINUX_IMMUTABLE": linux.CAP_LINUX_IMMUTABLE, 400 "CAP_NET_BIND_SERVICE": linux.CAP_NET_BIND_SERVICE, 401 "CAP_NET_BROADCAST": linux.CAP_NET_BROADCAST, 402 "CAP_NET_ADMIN": linux.CAP_NET_ADMIN, 403 "CAP_NET_RAW": linux.CAP_NET_RAW, 404 "CAP_IPC_LOCK": linux.CAP_IPC_LOCK, 405 "CAP_IPC_OWNER": linux.CAP_IPC_OWNER, 406 "CAP_SYS_MODULE": linux.CAP_SYS_MODULE, 407 "CAP_SYS_RAWIO": linux.CAP_SYS_RAWIO, 408 "CAP_SYS_CHROOT": linux.CAP_SYS_CHROOT, 409 "CAP_SYS_PTRACE": linux.CAP_SYS_PTRACE, 410 "CAP_SYS_PACCT": linux.CAP_SYS_PACCT, 411 "CAP_SYS_ADMIN": linux.CAP_SYS_ADMIN, 412 "CAP_SYS_BOOT": linux.CAP_SYS_BOOT, 413 "CAP_SYS_NICE": linux.CAP_SYS_NICE, 414 "CAP_SYS_RESOURCE": linux.CAP_SYS_RESOURCE, 415 "CAP_SYS_TIME": linux.CAP_SYS_TIME, 416 "CAP_SYS_TTY_CONFIG": linux.CAP_SYS_TTY_CONFIG, 417 "CAP_MKNOD": linux.CAP_MKNOD, 418 "CAP_LEASE": linux.CAP_LEASE, 419 "CAP_AUDIT_WRITE": linux.CAP_AUDIT_WRITE, 420 "CAP_AUDIT_CONTROL": linux.CAP_AUDIT_CONTROL, 421 "CAP_SETFCAP": linux.CAP_SETFCAP, 422 "CAP_MAC_OVERRIDE": linux.CAP_MAC_OVERRIDE, 423 "CAP_MAC_ADMIN": linux.CAP_MAC_ADMIN, 424 "CAP_SYSLOG": linux.CAP_SYSLOG, 425 "CAP_WAKE_ALARM": linux.CAP_WAKE_ALARM, 426 "CAP_BLOCK_SUSPEND": linux.CAP_BLOCK_SUSPEND, 427 "CAP_AUDIT_READ": linux.CAP_AUDIT_READ, 428 "CAP_PERFMON": linux.CAP_PERFMON, 429 "CAP_BPF": linux.CAP_BPF, 430 "CAP_CHECKPOINT_RESTORE": linux.CAP_CHECKPOINT_RESTORE, 431 } 432 433 func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth.CapabilitySet, error) { 434 var caps []linux.Capability 435 for _, n := range names { 436 c, ok := capFromName[n] 437 if !ok { 438 return 0, fmt.Errorf("unknown capability %q", n) 439 } 440 // Should we skip this capabilty? 441 if _, ok := skipSet[c]; ok { 442 continue 443 } 444 caps = append(caps, c) 445 } 446 return auth.CapabilitySetOfMany(caps), nil 447 } 448 449 // IsGoferMount returns true if the given mount can be mounted as an external 450 // gofer. 451 func IsGoferMount(m specs.Mount) bool { 452 MaybeConvertToBindMount(&m) 453 return m.Type == "bind" && m.Source != "" 454 } 455 456 // MaybeConvertToBindMount converts mount type to "bind" in case any of the 457 // mount options are either "bind" or "rbind" as required by the OCI spec. 458 // 459 // "For bind mounts (when options include either bind or rbind), the type is a 460 // dummy, often "none" (not listed in /proc/filesystems)." 461 func MaybeConvertToBindMount(m *specs.Mount) { 462 if m.Type == "bind" { 463 return 464 } 465 for _, opt := range m.Options { 466 if opt == "bind" || opt == "rbind" { 467 m.Type = "bind" 468 return 469 } 470 } 471 } 472 473 // WaitForReady waits for a process to become ready. The process is ready when 474 // the 'ready' function returns true. It continues to wait if 'ready' returns 475 // false. It returns error on timeout, if the process stops or if 'ready' fails. 476 func WaitForReady(pid int, timeout time.Duration, ready func() (bool, error)) error { 477 b := backoff.NewExponentialBackOff() 478 b.InitialInterval = 1 * time.Millisecond 479 b.MaxInterval = 1 * time.Second 480 b.MaxElapsedTime = timeout 481 482 op := func() error { 483 if ok, err := ready(); err != nil { 484 return backoff.Permanent(err) 485 } else if ok { 486 return nil 487 } 488 489 // Check if the process is still running. 490 // If the process is alive, child is 0 because of the NOHANG option. 491 // If the process has terminated, child equals the process id. 492 var ws unix.WaitStatus 493 var ru unix.Rusage 494 child, err := unix.Wait4(pid, &ws, unix.WNOHANG, &ru) 495 if err != nil { 496 return backoff.Permanent(fmt.Errorf("error waiting for process: %v", err)) 497 } else if child == pid { 498 return backoff.Permanent(fmt.Errorf("process %d has terminated", pid)) 499 } 500 return fmt.Errorf("process %d not running yet", pid) 501 } 502 return backoff.Retry(op, b) 503 } 504 505 // DebugLogFile opens a log file using 'logPattern' as location. If 'logPattern' 506 // ends with '/', it's used as a directory with default file name. 507 // 'logPattern' can contain variables that are substituted: 508 // - %TIMESTAMP%: is replaced with a timestamp using the following format: 509 // <yyyymmdd-hhmmss.uuuuuu> 510 // - %COMMAND%: is replaced with 'command' 511 // - %TEST%: is replaced with 'test' (omitted by default) 512 func DebugLogFile(logPattern, command, test string) (*os.File, error) { 513 if strings.HasSuffix(logPattern, "/") { 514 // Default format: <debug-log>/runsc.log.<yyyymmdd-hhmmss.uuuuuu>.<command>.txt 515 logPattern += "runsc.log.%TIMESTAMP%.%COMMAND%.txt" 516 } 517 logPattern = strings.Replace(logPattern, "%TIMESTAMP%", time.Now().Format("20060102-150405.000000"), -1) 518 logPattern = strings.Replace(logPattern, "%COMMAND%", command, -1) 519 logPattern = strings.Replace(logPattern, "%TEST%", test, -1) 520 521 dir := filepath.Dir(logPattern) 522 if err := os.MkdirAll(dir, 0775); err != nil { 523 return nil, fmt.Errorf("error creating dir %q: %v", dir, err) 524 } 525 return os.OpenFile(logPattern, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664) 526 } 527 528 // IsDebugCommand returns true if the command should be debugged or not, based 529 // on the current configuration. 530 func IsDebugCommand(conf *config.Config, command string) bool { 531 if len(conf.DebugCommand) == 0 { 532 // Debug everything by default. 533 return true 534 } 535 filter := conf.DebugCommand 536 rv := true 537 if filter[0] == '!' { 538 // Negate the match, e.g. !boot should log all, but "boot". 539 filter = filter[1:] 540 rv = false 541 } 542 for _, cmd := range strings.Split(filter, ",") { 543 if cmd == command { 544 return rv 545 } 546 } 547 return !rv 548 } 549 550 // SafeSetupAndMount creates the mount point and calls Mount with the given 551 // flags. procPath is the path to procfs. If it is "", procfs is assumed to be 552 // mounted at /proc. 553 func SafeSetupAndMount(src, dst, typ string, flags uint32, procPath string) error { 554 // Create the mount point inside. The type must be the same as the source 555 // (file or directory). 556 var isDir bool 557 if typ == "proc" { 558 // Special case, as there is no source directory for proc mounts. 559 isDir = true 560 } else if fi, err := os.Stat(src); err != nil { 561 return fmt.Errorf("stat(%q) failed: %v", src, err) 562 } else { 563 isDir = fi.IsDir() 564 } 565 566 if isDir { 567 // Create the destination directory. 568 if err := os.MkdirAll(dst, 0777); err != nil { 569 return fmt.Errorf("mkdir(%q) failed: %v", dst, err) 570 } 571 } else { 572 // Create the parent destination directory. 573 parent := path.Dir(dst) 574 if err := os.MkdirAll(parent, 0777); err != nil { 575 return fmt.Errorf("mkdir(%q) failed: %v", parent, err) 576 } 577 // Create the destination file if it does not exist. 578 f, err := os.OpenFile(dst, unix.O_CREAT, 0777) 579 if err != nil { 580 return fmt.Errorf("open(%q) failed: %v", dst, err) 581 } 582 f.Close() 583 } 584 585 // Do the mount. 586 if err := SafeMount(src, dst, typ, uintptr(flags), "", procPath); err != nil { 587 return fmt.Errorf("mount(%q, %q, %d) failed: %v", src, dst, flags, err) 588 } 589 return nil 590 } 591 592 // ErrSymlinkMount is returned by SafeMount when the mount destination is found 593 // to be a symlink. 594 type ErrSymlinkMount struct { 595 error 596 } 597 598 // SafeMount is like unix.Mount, but will fail if dst is a symlink. procPath is 599 // the path to procfs. If it is "", procfs is assumed to be mounted at /proc. 600 // 601 // SafeMount can fail when dst contains a symlink. However, it is called in the 602 // normal case with a destination consisting of a known root (/proc/root) and 603 // symlink-free path (from resolveSymlink). 604 func SafeMount(src, dst, fstype string, flags uintptr, data, procPath string) error { 605 // Open the destination. 606 fd, err := unix.Open(dst, unix.O_PATH|unix.O_CLOEXEC, 0) 607 if err != nil { 608 return fmt.Errorf("failed to safely mount: Open(%s, _, _): %w", dst, err) 609 } 610 defer unix.Close(fd) 611 612 // Use /proc/self/fd/ to verify that we opened the intended destination. This 613 // guards against dst being a symlink, in which case we could accidentally 614 // mount over the symlink's target. 615 if procPath == "" { 616 procPath = "/proc" 617 } 618 safePath := filepath.Join(procPath, "self/fd", strconv.Itoa(fd)) 619 target, err := os.Readlink(safePath) 620 if err != nil { 621 return fmt.Errorf("failed to safely mount: Readlink(%s): %w", safePath, err) 622 } 623 if dst != target { 624 return &ErrSymlinkMount{fmt.Errorf("failed to safely mount: expected to open %s, but found %s", dst, target)} 625 } 626 627 return unix.Mount(src, safePath, fstype, flags, data) 628 } 629 630 // ContainsStr returns true if 'str' is inside 'strs'. 631 func ContainsStr(strs []string, str string) bool { 632 for _, s := range strs { 633 if s == str { 634 return true 635 } 636 } 637 return false 638 } 639 640 // RetryEintr retries the function until an error different than EINTR is 641 // returned. 642 func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) { 643 for { 644 r1, r2, err := f() 645 if err != unix.EINTR { 646 return r1, r2, err 647 } 648 } 649 } 650 651 // GetOOMScoreAdj reads the given process' oom_score_adj 652 func GetOOMScoreAdj(pid int) (int, error) { 653 data, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid)) 654 if err != nil { 655 return 0, err 656 } 657 return strconv.Atoi(strings.TrimSpace(string(data))) 658 } 659 660 // EnvVar looks for a variable value in the env slice assuming the following 661 // format: "NAME=VALUE". If a variable is defined multiple times, the last 662 // value is used. 663 func EnvVar(env []string, name string) (string, bool) { 664 var err error 665 env, err = ResolveEnvs(env) 666 if err != nil { 667 return "", false 668 } 669 prefix := name + "=" 670 for _, e := range env { 671 if strings.HasPrefix(e, prefix) { 672 return strings.TrimPrefix(e, prefix), true 673 } 674 } 675 return "", false 676 } 677 678 // ResolveEnvs transforms lists of environment variables into a single list of 679 // environment variables. If a variable is defined multiple times, the last 680 // value is used. 681 func ResolveEnvs(envs ...[]string) ([]string, error) { 682 // First create a map of variable names to values. This removes any 683 // duplicates. 684 envMap := make(map[string]string) 685 for _, env := range envs { 686 for _, str := range env { 687 parts := strings.SplitN(str, "=", 2) 688 if len(parts) != 2 { 689 return nil, fmt.Errorf("invalid variable: %s", str) 690 } 691 envMap[parts[0]] = parts[1] 692 } 693 } 694 // Reassemble envMap into a list of environment variables of the form 695 // NAME=VALUE. 696 env := make([]string, 0, len(envMap)) 697 for k, v := range envMap { 698 env = append(env, fmt.Sprintf("%s=%s", k, v)) 699 } 700 return env, nil 701 } 702 703 // FaqErrorMsg returns an error message pointing to the FAQ. 704 func FaqErrorMsg(anchor, msg string) string { 705 return fmt.Sprintf("%s; see https://gvisor.dev/faq#%s for more details", msg, anchor) 706 }