github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/runsc/specutils/specutils.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package specutils contains utility functions for working with OCI runtime
    16  // specs.
    17  package specutils
    18  
    19  import (
    20  	"encoding/json"
    21  	"fmt"
    22  	"io"
    23  	"io/ioutil"
    24  	"os"
    25  	"path"
    26  	"path/filepath"
    27  	"strconv"
    28  	"strings"
    29  	"time"
    30  
    31  	"github.com/MerlinKodo/gvisor/pkg/abi/linux"
    32  	"github.com/MerlinKodo/gvisor/pkg/bits"
    33  	"github.com/MerlinKodo/gvisor/pkg/log"
    34  	"github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth"
    35  	"github.com/MerlinKodo/gvisor/runsc/config"
    36  	"github.com/MerlinKodo/gvisor/runsc/flag"
    37  	"github.com/cenkalti/backoff"
    38  	"github.com/mohae/deepcopy"
    39  	specs "github.com/opencontainers/runtime-spec/specs-go"
    40  	"golang.org/x/sys/unix"
    41  )
    42  
    43  const (
    44  	annotationFlagPrefix            = "dev.gvisor.flag."
    45  	annotationSeccomp               = "dev.gvisor.internal.seccomp."
    46  	annotationSeccompRuntimeDefault = "RuntimeDefault"
    47  
    48  	annotationContainerName = "io.kubernetes.cri.container-name"
    49  )
    50  
    51  // ExePath must point to runsc binary, which is normally the same binary. It's
    52  // changed in tests that aren't linked in the same binary.
    53  var ExePath = "/proc/self/exe"
    54  
    55  // Version is the supported spec version.
    56  var Version = specs.Version
    57  
    58  // LogSpecDebug writes the spec in a human-friendly format to the debug log.
    59  func LogSpecDebug(orig *specs.Spec, logSeccomp bool) {
    60  	if !log.IsLogging(log.Debug) {
    61  		return
    62  	}
    63  
    64  	// Strip down parts of the spec that are not interesting.
    65  	spec := deepcopy.Copy(orig).(*specs.Spec)
    66  	if spec.Process != nil {
    67  		spec.Process.Capabilities = nil
    68  	}
    69  	if spec.Linux != nil {
    70  		if !logSeccomp {
    71  			spec.Linux.Seccomp = nil
    72  		}
    73  		spec.Linux.MaskedPaths = nil
    74  		spec.Linux.ReadonlyPaths = nil
    75  		if spec.Linux.Resources != nil {
    76  			spec.Linux.Resources.Devices = nil
    77  		}
    78  	}
    79  
    80  	out, err := json.MarshalIndent(spec, "", "  ")
    81  	if err != nil {
    82  		log.Debugf("Failed to marshal spec: %v", err)
    83  		return
    84  	}
    85  	log.Debugf("Spec:\n%s", out)
    86  }
    87  
    88  // ValidateSpec validates that the spec is compatible with runsc.
    89  func ValidateSpec(spec *specs.Spec) error {
    90  	// Mandatory fields.
    91  	if spec.Process == nil {
    92  		return fmt.Errorf("Spec.Process must be defined: %+v", spec)
    93  	}
    94  	if len(spec.Process.Args) == 0 {
    95  		return fmt.Errorf("Spec.Process.Arg must be defined: %+v", spec.Process)
    96  	}
    97  	if spec.Root == nil {
    98  		return fmt.Errorf("Spec.Root must be defined: %+v", spec)
    99  	}
   100  	if len(spec.Root.Path) == 0 {
   101  		return fmt.Errorf("Spec.Root.Path must be defined: %+v", spec.Root)
   102  	}
   103  
   104  	// Unsupported fields.
   105  	if spec.Solaris != nil {
   106  		return fmt.Errorf("Spec.Solaris is not supported: %+v", spec)
   107  	}
   108  	if spec.Windows != nil {
   109  		return fmt.Errorf("Spec.Windows is not supported: %+v", spec)
   110  	}
   111  	if len(spec.Process.SelinuxLabel) != 0 {
   112  		return fmt.Errorf("SELinux is not supported: %s", spec.Process.SelinuxLabel)
   113  	}
   114  
   115  	// Docker uses AppArmor by default, so just log that it's being ignored.
   116  	if spec.Process.ApparmorProfile != "" {
   117  		log.Warningf("AppArmor profile %q is being ignored", spec.Process.ApparmorProfile)
   118  	}
   119  
   120  	// PR_SET_NO_NEW_PRIVS is assumed to always be set.
   121  	// See kernel.Task.updateCredsForExecLocked.
   122  	if !spec.Process.NoNewPrivileges {
   123  		log.Warningf("noNewPrivileges ignored. PR_SET_NO_NEW_PRIVS is assumed to always be set.")
   124  	}
   125  
   126  	if spec.Linux != nil && spec.Linux.RootfsPropagation != "" {
   127  		if err := validateRootfsPropagation(spec.Linux.RootfsPropagation); err != nil {
   128  			return err
   129  		}
   130  	}
   131  	for _, m := range spec.Mounts {
   132  		if err := validateMount(&m); err != nil {
   133  			return err
   134  		}
   135  	}
   136  
   137  	// CRI specifies whether a container should start a new sandbox, or run
   138  	// another container in an existing sandbox.
   139  	switch SpecContainerType(spec) {
   140  	case ContainerTypeContainer:
   141  		// When starting a container in an existing sandbox, the
   142  		// sandbox ID must be set.
   143  		if _, ok := SandboxID(spec); !ok {
   144  			return fmt.Errorf("spec has container-type of container, but no sandbox ID set")
   145  		}
   146  	case ContainerTypeUnknown:
   147  		return fmt.Errorf("unknown container-type")
   148  	default:
   149  	}
   150  
   151  	return nil
   152  }
   153  
   154  // absPath turns the given path into an absolute path (if it is not already
   155  // absolute) by prepending the base path.
   156  func absPath(base, rel string) string {
   157  	if filepath.IsAbs(rel) {
   158  		return rel
   159  	}
   160  	return filepath.Join(base, rel)
   161  }
   162  
   163  // OpenSpec opens an OCI runtime spec from the given bundle directory.
   164  func OpenSpec(bundleDir string) (*os.File, error) {
   165  	// The spec file must be named "config.json" inside the bundle directory.
   166  	return os.Open(filepath.Join(bundleDir, "config.json"))
   167  }
   168  
   169  // ReadSpec reads an OCI runtime spec from the given bundle directory.
   170  // ReadSpec also normalizes all potential relative paths into absolute
   171  // path, e.g. spec.Root.Path, mount.Source.
   172  func ReadSpec(bundleDir string, conf *config.Config) (*specs.Spec, error) {
   173  	specFile, err := OpenSpec(bundleDir)
   174  	if err != nil {
   175  		return nil, fmt.Errorf("error opening spec file %q: %v", filepath.Join(bundleDir, "config.json"), err)
   176  	}
   177  	defer specFile.Close()
   178  	return ReadSpecFromFile(bundleDir, specFile, conf)
   179  }
   180  
   181  // ReadSpecFromFile reads an OCI runtime spec from the given file. It also fixes
   182  // up the spec so that the rest of the code doesn't need to worry about it.
   183  //  1. Normalizes all relative paths into absolute by prepending the bundle
   184  //     dir to them.
   185  //  2. Looks for flag overrides and applies them if any.
   186  //  3. Removes seccomp rules if `RuntimeDefault` was used.
   187  func ReadSpecFromFile(bundleDir string, specFile *os.File, conf *config.Config) (*specs.Spec, error) {
   188  	if _, err := specFile.Seek(0, io.SeekStart); err != nil {
   189  		return nil, fmt.Errorf("error seeking to beginning of file %q: %v", specFile.Name(), err)
   190  	}
   191  	specBytes, err := ioutil.ReadAll(specFile)
   192  	if err != nil {
   193  		return nil, fmt.Errorf("error reading spec from file %q: %v", specFile.Name(), err)
   194  	}
   195  	var spec specs.Spec
   196  	if err := json.Unmarshal(specBytes, &spec); err != nil {
   197  		return nil, fmt.Errorf("error unmarshaling spec from file %q: %v\n %s", specFile.Name(), err, string(specBytes))
   198  	}
   199  	if err := ValidateSpec(&spec); err != nil {
   200  		return nil, err
   201  	}
   202  	if err := fixSpec(&spec, bundleDir, conf); err != nil {
   203  		return nil, err
   204  	}
   205  	return &spec, nil
   206  }
   207  
   208  func fixSpec(spec *specs.Spec, bundleDir string, conf *config.Config) error {
   209  	// Turn any relative paths in the spec to absolute by prepending the bundleDir.
   210  	spec.Root.Path = absPath(bundleDir, spec.Root.Path)
   211  	for i := range spec.Mounts {
   212  		m := &spec.Mounts[i]
   213  		if m.Source != "" {
   214  			m.Source = absPath(bundleDir, m.Source)
   215  		}
   216  	}
   217  	// Look for config bundle annotations and verify that they exist.
   218  	const configBundlePrefix = "dev.gvisor.bundle."
   219  	var bundles []config.BundleName
   220  	for annotation, val := range spec.Annotations {
   221  		if !strings.HasPrefix(annotation, configBundlePrefix) {
   222  			continue
   223  		}
   224  		if val != "true" {
   225  			return fmt.Errorf("invalid value %q for annotation %q (must be set to 'true' or removed entirely)", val, annotation)
   226  		}
   227  		bundleName := config.BundleName(annotation[len(configBundlePrefix):])
   228  		if _, exists := config.Bundles[bundleName]; !exists {
   229  			log.Warningf("Bundle name %q (from annotation %q=%q) does not exist; this bundle may have been deprecated. Skipping.", bundleName, annotation, val)
   230  			continue
   231  		}
   232  		bundles = append(bundles, bundleName)
   233  	}
   234  
   235  	// Apply config bundles, if any.
   236  	if len(bundles) > 0 {
   237  		log.Infof("Applying config bundles: %v", bundles)
   238  		if err := conf.ApplyBundles(flag.CommandLine, bundles...); err != nil {
   239  			return err
   240  		}
   241  	}
   242  
   243  	// Check annotation to see if container name is available.
   244  	var containerName string
   245  	for key, val := range spec.Annotations {
   246  		if key == annotationContainerName {
   247  			containerName = val
   248  			log.Debugf("Container name: %q", containerName)
   249  			break
   250  		}
   251  	}
   252  	for annotation, val := range spec.Annotations {
   253  		if strings.HasPrefix(annotation, annotationFlagPrefix) {
   254  			// Override flags using annotation to allow customization per sandbox
   255  			// instance.
   256  			name := annotation[len(annotationFlagPrefix):]
   257  			log.Infof("Overriding flag from flag annotation: --%s=%q", name, val)
   258  			if err := conf.Override(flag.CommandLine, name, val /* force= */, false); err != nil {
   259  				return err
   260  			}
   261  		} else if len(containerName) > 0 {
   262  			// If we know the container name, then check to see if seccomp
   263  			// instructions were given to the the container.
   264  			if annotation == annotationSeccomp+containerName && val == annotationSeccompRuntimeDefault {
   265  				// Container seccomp rules are redundant when using gVisor, so remove
   266  				// them when seccomp is set to RuntimeDefault.
   267  				if spec.Linux != nil && spec.Linux.Seccomp != nil {
   268  					log.Debugf("Seccomp is being ignored because annotation %q is set to default.", annotationSeccomp)
   269  					spec.Linux.Seccomp = nil
   270  				}
   271  			}
   272  		}
   273  	}
   274  	return nil
   275  }
   276  
   277  // ReadMounts reads mount list from a file.
   278  func ReadMounts(f *os.File) ([]specs.Mount, error) {
   279  	bytes, err := ioutil.ReadAll(f)
   280  	if err != nil {
   281  		return nil, fmt.Errorf("error reading mounts: %v", err)
   282  	}
   283  	var mounts []specs.Mount
   284  	if err := json.Unmarshal(bytes, &mounts); err != nil {
   285  		return nil, fmt.Errorf("error unmarshaling mounts: %v\nJSON bytes:\n%s", err, string(bytes))
   286  	}
   287  	return mounts, nil
   288  }
   289  
   290  // Capabilities takes in spec and returns a TaskCapabilities corresponding to
   291  // the spec.
   292  func Capabilities(enableRaw bool, specCaps *specs.LinuxCapabilities) (*auth.TaskCapabilities, error) {
   293  	// Strip CAP_NET_RAW from all capability sets if necessary.
   294  	skipSet := map[linux.Capability]struct{}{}
   295  	if !enableRaw {
   296  		skipSet[linux.CAP_NET_RAW] = struct{}{}
   297  	}
   298  
   299  	var caps auth.TaskCapabilities
   300  	if specCaps != nil {
   301  		var err error
   302  		if caps.BoundingCaps, err = capsFromNames(specCaps.Bounding, skipSet); err != nil {
   303  			return nil, err
   304  		}
   305  		if caps.EffectiveCaps, err = capsFromNames(specCaps.Effective, skipSet); err != nil {
   306  			return nil, err
   307  		}
   308  		if caps.InheritableCaps, err = capsFromNames(specCaps.Inheritable, skipSet); err != nil {
   309  			return nil, err
   310  		}
   311  		if caps.PermittedCaps, err = capsFromNames(specCaps.Permitted, skipSet); err != nil {
   312  			return nil, err
   313  		}
   314  		// TODO(gvisor.dev/issue/3166): Support ambient capabilities.
   315  	}
   316  	return &caps, nil
   317  }
   318  
   319  // AllCapabilities returns a LinuxCapabilities struct with all capabilities.
   320  func AllCapabilities() *specs.LinuxCapabilities {
   321  	var names []string
   322  	for n := range capFromName {
   323  		names = append(names, n)
   324  	}
   325  	return &specs.LinuxCapabilities{
   326  		Bounding:    names,
   327  		Effective:   names,
   328  		Inheritable: names,
   329  		Permitted:   names,
   330  		Ambient:     names,
   331  	}
   332  }
   333  
   334  // AllCapabilitiesUint64 returns a bitmask containing all capabilities set.
   335  func AllCapabilitiesUint64() uint64 {
   336  	var rv uint64
   337  	for _, cap := range capFromName {
   338  		rv |= bits.MaskOf64(int(cap))
   339  	}
   340  	return rv
   341  }
   342  
   343  // MergeCapabilities merges the capabilites from first and second.
   344  func MergeCapabilities(first, second *specs.LinuxCapabilities) *specs.LinuxCapabilities {
   345  	return &specs.LinuxCapabilities{
   346  		Bounding:    mergeUnique(first.Bounding, second.Bounding),
   347  		Effective:   mergeUnique(first.Effective, second.Effective),
   348  		Inheritable: mergeUnique(first.Inheritable, second.Inheritable),
   349  		Permitted:   mergeUnique(first.Permitted, second.Permitted),
   350  		Ambient:     mergeUnique(first.Ambient, second.Ambient),
   351  	}
   352  }
   353  
   354  // DropCapability removes the specified capability from all capability sets.
   355  func DropCapability(caps *specs.LinuxCapabilities, drop string) {
   356  	caps.Bounding = remove(caps.Bounding, drop)
   357  	caps.Effective = remove(caps.Effective, drop)
   358  	caps.Inheritable = remove(caps.Inheritable, drop)
   359  	caps.Permitted = remove(caps.Permitted, drop)
   360  	caps.Ambient = remove(caps.Ambient, drop)
   361  }
   362  
   363  func mergeUnique(strSlices ...[]string) []string {
   364  	common := make(map[string]struct{})
   365  	for _, strSlice := range strSlices {
   366  		for _, s := range strSlice {
   367  			common[s] = struct{}{}
   368  		}
   369  	}
   370  
   371  	res := make([]string, 0, len(common))
   372  	for s := range common {
   373  		res = append(res, s)
   374  	}
   375  	return res
   376  }
   377  
   378  func remove(ss []string, rem string) []string {
   379  	var out []string
   380  	for _, s := range ss {
   381  		if s == rem {
   382  			continue
   383  		}
   384  		out = append(out, s)
   385  	}
   386  	return out
   387  }
   388  
   389  var capFromName = map[string]linux.Capability{
   390  	"CAP_CHOWN":              linux.CAP_CHOWN,
   391  	"CAP_DAC_OVERRIDE":       linux.CAP_DAC_OVERRIDE,
   392  	"CAP_DAC_READ_SEARCH":    linux.CAP_DAC_READ_SEARCH,
   393  	"CAP_FOWNER":             linux.CAP_FOWNER,
   394  	"CAP_FSETID":             linux.CAP_FSETID,
   395  	"CAP_KILL":               linux.CAP_KILL,
   396  	"CAP_SETGID":             linux.CAP_SETGID,
   397  	"CAP_SETUID":             linux.CAP_SETUID,
   398  	"CAP_SETPCAP":            linux.CAP_SETPCAP,
   399  	"CAP_LINUX_IMMUTABLE":    linux.CAP_LINUX_IMMUTABLE,
   400  	"CAP_NET_BIND_SERVICE":   linux.CAP_NET_BIND_SERVICE,
   401  	"CAP_NET_BROADCAST":      linux.CAP_NET_BROADCAST,
   402  	"CAP_NET_ADMIN":          linux.CAP_NET_ADMIN,
   403  	"CAP_NET_RAW":            linux.CAP_NET_RAW,
   404  	"CAP_IPC_LOCK":           linux.CAP_IPC_LOCK,
   405  	"CAP_IPC_OWNER":          linux.CAP_IPC_OWNER,
   406  	"CAP_SYS_MODULE":         linux.CAP_SYS_MODULE,
   407  	"CAP_SYS_RAWIO":          linux.CAP_SYS_RAWIO,
   408  	"CAP_SYS_CHROOT":         linux.CAP_SYS_CHROOT,
   409  	"CAP_SYS_PTRACE":         linux.CAP_SYS_PTRACE,
   410  	"CAP_SYS_PACCT":          linux.CAP_SYS_PACCT,
   411  	"CAP_SYS_ADMIN":          linux.CAP_SYS_ADMIN,
   412  	"CAP_SYS_BOOT":           linux.CAP_SYS_BOOT,
   413  	"CAP_SYS_NICE":           linux.CAP_SYS_NICE,
   414  	"CAP_SYS_RESOURCE":       linux.CAP_SYS_RESOURCE,
   415  	"CAP_SYS_TIME":           linux.CAP_SYS_TIME,
   416  	"CAP_SYS_TTY_CONFIG":     linux.CAP_SYS_TTY_CONFIG,
   417  	"CAP_MKNOD":              linux.CAP_MKNOD,
   418  	"CAP_LEASE":              linux.CAP_LEASE,
   419  	"CAP_AUDIT_WRITE":        linux.CAP_AUDIT_WRITE,
   420  	"CAP_AUDIT_CONTROL":      linux.CAP_AUDIT_CONTROL,
   421  	"CAP_SETFCAP":            linux.CAP_SETFCAP,
   422  	"CAP_MAC_OVERRIDE":       linux.CAP_MAC_OVERRIDE,
   423  	"CAP_MAC_ADMIN":          linux.CAP_MAC_ADMIN,
   424  	"CAP_SYSLOG":             linux.CAP_SYSLOG,
   425  	"CAP_WAKE_ALARM":         linux.CAP_WAKE_ALARM,
   426  	"CAP_BLOCK_SUSPEND":      linux.CAP_BLOCK_SUSPEND,
   427  	"CAP_AUDIT_READ":         linux.CAP_AUDIT_READ,
   428  	"CAP_PERFMON":            linux.CAP_PERFMON,
   429  	"CAP_BPF":                linux.CAP_BPF,
   430  	"CAP_CHECKPOINT_RESTORE": linux.CAP_CHECKPOINT_RESTORE,
   431  }
   432  
   433  func capsFromNames(names []string, skipSet map[linux.Capability]struct{}) (auth.CapabilitySet, error) {
   434  	var caps []linux.Capability
   435  	for _, n := range names {
   436  		c, ok := capFromName[n]
   437  		if !ok {
   438  			return 0, fmt.Errorf("unknown capability %q", n)
   439  		}
   440  		// Should we skip this capabilty?
   441  		if _, ok := skipSet[c]; ok {
   442  			continue
   443  		}
   444  		caps = append(caps, c)
   445  	}
   446  	return auth.CapabilitySetOfMany(caps), nil
   447  }
   448  
   449  // IsGoferMount returns true if the given mount can be mounted as an external
   450  // gofer.
   451  func IsGoferMount(m specs.Mount) bool {
   452  	MaybeConvertToBindMount(&m)
   453  	return m.Type == "bind" && m.Source != ""
   454  }
   455  
   456  // MaybeConvertToBindMount converts mount type to "bind" in case any of the
   457  // mount options are either "bind" or "rbind" as required by the OCI spec.
   458  //
   459  // "For bind mounts (when options include either bind or rbind), the type is a
   460  // dummy, often "none" (not listed in /proc/filesystems)."
   461  func MaybeConvertToBindMount(m *specs.Mount) {
   462  	if m.Type == "bind" {
   463  		return
   464  	}
   465  	for _, opt := range m.Options {
   466  		if opt == "bind" || opt == "rbind" {
   467  			m.Type = "bind"
   468  			return
   469  		}
   470  	}
   471  }
   472  
   473  // WaitForReady waits for a process to become ready. The process is ready when
   474  // the 'ready' function returns true. It continues to wait if 'ready' returns
   475  // false. It returns error on timeout, if the process stops or if 'ready' fails.
   476  func WaitForReady(pid int, timeout time.Duration, ready func() (bool, error)) error {
   477  	b := backoff.NewExponentialBackOff()
   478  	b.InitialInterval = 1 * time.Millisecond
   479  	b.MaxInterval = 1 * time.Second
   480  	b.MaxElapsedTime = timeout
   481  
   482  	op := func() error {
   483  		if ok, err := ready(); err != nil {
   484  			return backoff.Permanent(err)
   485  		} else if ok {
   486  			return nil
   487  		}
   488  
   489  		// Check if the process is still running.
   490  		// If the process is alive, child is 0 because of the NOHANG option.
   491  		// If the process has terminated, child equals the process id.
   492  		var ws unix.WaitStatus
   493  		var ru unix.Rusage
   494  		child, err := unix.Wait4(pid, &ws, unix.WNOHANG, &ru)
   495  		if err != nil {
   496  			return backoff.Permanent(fmt.Errorf("error waiting for process: %v", err))
   497  		} else if child == pid {
   498  			return backoff.Permanent(fmt.Errorf("process %d has terminated", pid))
   499  		}
   500  		return fmt.Errorf("process %d not running yet", pid)
   501  	}
   502  	return backoff.Retry(op, b)
   503  }
   504  
   505  // DebugLogFile opens a log file using 'logPattern' as location. If 'logPattern'
   506  // ends with '/', it's used as a directory with default file name.
   507  // 'logPattern' can contain variables that are substituted:
   508  //   - %TIMESTAMP%: is replaced with a timestamp using the following format:
   509  //     <yyyymmdd-hhmmss.uuuuuu>
   510  //   - %COMMAND%: is replaced with 'command'
   511  //   - %TEST%: is replaced with 'test' (omitted by default)
   512  func DebugLogFile(logPattern, command, test string) (*os.File, error) {
   513  	if strings.HasSuffix(logPattern, "/") {
   514  		// Default format: <debug-log>/runsc.log.<yyyymmdd-hhmmss.uuuuuu>.<command>.txt
   515  		logPattern += "runsc.log.%TIMESTAMP%.%COMMAND%.txt"
   516  	}
   517  	logPattern = strings.Replace(logPattern, "%TIMESTAMP%", time.Now().Format("20060102-150405.000000"), -1)
   518  	logPattern = strings.Replace(logPattern, "%COMMAND%", command, -1)
   519  	logPattern = strings.Replace(logPattern, "%TEST%", test, -1)
   520  
   521  	dir := filepath.Dir(logPattern)
   522  	if err := os.MkdirAll(dir, 0775); err != nil {
   523  		return nil, fmt.Errorf("error creating dir %q: %v", dir, err)
   524  	}
   525  	return os.OpenFile(logPattern, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0664)
   526  }
   527  
   528  // IsDebugCommand returns true if the command should be debugged or not, based
   529  // on the current configuration.
   530  func IsDebugCommand(conf *config.Config, command string) bool {
   531  	if len(conf.DebugCommand) == 0 {
   532  		// Debug everything by default.
   533  		return true
   534  	}
   535  	filter := conf.DebugCommand
   536  	rv := true
   537  	if filter[0] == '!' {
   538  		// Negate the match, e.g. !boot should log all, but "boot".
   539  		filter = filter[1:]
   540  		rv = false
   541  	}
   542  	for _, cmd := range strings.Split(filter, ",") {
   543  		if cmd == command {
   544  			return rv
   545  		}
   546  	}
   547  	return !rv
   548  }
   549  
   550  // SafeSetupAndMount creates the mount point and calls Mount with the given
   551  // flags. procPath is the path to procfs. If it is "", procfs is assumed to be
   552  // mounted at /proc.
   553  func SafeSetupAndMount(src, dst, typ string, flags uint32, procPath string) error {
   554  	// Create the mount point inside. The type must be the same as the source
   555  	// (file or directory).
   556  	var isDir bool
   557  	if typ == "proc" {
   558  		// Special case, as there is no source directory for proc mounts.
   559  		isDir = true
   560  	} else if fi, err := os.Stat(src); err != nil {
   561  		return fmt.Errorf("stat(%q) failed: %v", src, err)
   562  	} else {
   563  		isDir = fi.IsDir()
   564  	}
   565  
   566  	if isDir {
   567  		// Create the destination directory.
   568  		if err := os.MkdirAll(dst, 0777); err != nil {
   569  			return fmt.Errorf("mkdir(%q) failed: %v", dst, err)
   570  		}
   571  	} else {
   572  		// Create the parent destination directory.
   573  		parent := path.Dir(dst)
   574  		if err := os.MkdirAll(parent, 0777); err != nil {
   575  			return fmt.Errorf("mkdir(%q) failed: %v", parent, err)
   576  		}
   577  		// Create the destination file if it does not exist.
   578  		f, err := os.OpenFile(dst, unix.O_CREAT, 0777)
   579  		if err != nil {
   580  			return fmt.Errorf("open(%q) failed: %v", dst, err)
   581  		}
   582  		f.Close()
   583  	}
   584  
   585  	// Do the mount.
   586  	if err := SafeMount(src, dst, typ, uintptr(flags), "", procPath); err != nil {
   587  		return fmt.Errorf("mount(%q, %q, %d) failed: %v", src, dst, flags, err)
   588  	}
   589  	return nil
   590  }
   591  
   592  // ErrSymlinkMount is returned by SafeMount when the mount destination is found
   593  // to be a symlink.
   594  type ErrSymlinkMount struct {
   595  	error
   596  }
   597  
   598  // SafeMount is like unix.Mount, but will fail if dst is a symlink. procPath is
   599  // the path to procfs. If it is "", procfs is assumed to be mounted at /proc.
   600  //
   601  // SafeMount can fail when dst contains a symlink. However, it is called in the
   602  // normal case with a destination consisting of a known root (/proc/root) and
   603  // symlink-free path (from resolveSymlink).
   604  func SafeMount(src, dst, fstype string, flags uintptr, data, procPath string) error {
   605  	// Open the destination.
   606  	fd, err := unix.Open(dst, unix.O_PATH|unix.O_CLOEXEC, 0)
   607  	if err != nil {
   608  		return fmt.Errorf("failed to safely mount: Open(%s, _, _): %w", dst, err)
   609  	}
   610  	defer unix.Close(fd)
   611  
   612  	// Use /proc/self/fd/ to verify that we opened the intended destination. This
   613  	// guards against dst being a symlink, in which case we could accidentally
   614  	// mount over the symlink's target.
   615  	if procPath == "" {
   616  		procPath = "/proc"
   617  	}
   618  	safePath := filepath.Join(procPath, "self/fd", strconv.Itoa(fd))
   619  	target, err := os.Readlink(safePath)
   620  	if err != nil {
   621  		return fmt.Errorf("failed to safely mount: Readlink(%s): %w", safePath, err)
   622  	}
   623  	if dst != target {
   624  		return &ErrSymlinkMount{fmt.Errorf("failed to safely mount: expected to open %s, but found %s", dst, target)}
   625  	}
   626  
   627  	return unix.Mount(src, safePath, fstype, flags, data)
   628  }
   629  
   630  // ContainsStr returns true if 'str' is inside 'strs'.
   631  func ContainsStr(strs []string, str string) bool {
   632  	for _, s := range strs {
   633  		if s == str {
   634  			return true
   635  		}
   636  	}
   637  	return false
   638  }
   639  
   640  // RetryEintr retries the function until an error different than EINTR is
   641  // returned.
   642  func RetryEintr(f func() (uintptr, uintptr, error)) (uintptr, uintptr, error) {
   643  	for {
   644  		r1, r2, err := f()
   645  		if err != unix.EINTR {
   646  			return r1, r2, err
   647  		}
   648  	}
   649  }
   650  
   651  // GetOOMScoreAdj reads the given process' oom_score_adj
   652  func GetOOMScoreAdj(pid int) (int, error) {
   653  	data, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid))
   654  	if err != nil {
   655  		return 0, err
   656  	}
   657  	return strconv.Atoi(strings.TrimSpace(string(data)))
   658  }
   659  
   660  // EnvVar looks for a variable value in the env slice assuming the following
   661  // format: "NAME=VALUE". If a variable is defined multiple times, the last
   662  // value is used.
   663  func EnvVar(env []string, name string) (string, bool) {
   664  	var err error
   665  	env, err = ResolveEnvs(env)
   666  	if err != nil {
   667  		return "", false
   668  	}
   669  	prefix := name + "="
   670  	for _, e := range env {
   671  		if strings.HasPrefix(e, prefix) {
   672  			return strings.TrimPrefix(e, prefix), true
   673  		}
   674  	}
   675  	return "", false
   676  }
   677  
   678  // ResolveEnvs transforms lists of environment variables into a single list of
   679  // environment variables. If a variable is defined multiple times, the last
   680  // value is used.
   681  func ResolveEnvs(envs ...[]string) ([]string, error) {
   682  	// First create a map of variable names to values. This removes any
   683  	// duplicates.
   684  	envMap := make(map[string]string)
   685  	for _, env := range envs {
   686  		for _, str := range env {
   687  			parts := strings.SplitN(str, "=", 2)
   688  			if len(parts) != 2 {
   689  				return nil, fmt.Errorf("invalid variable: %s", str)
   690  			}
   691  			envMap[parts[0]] = parts[1]
   692  		}
   693  	}
   694  	// Reassemble envMap into a list of environment variables of the form
   695  	// NAME=VALUE.
   696  	env := make([]string, 0, len(envMap))
   697  	for k, v := range envMap {
   698  		env = append(env, fmt.Sprintf("%s=%s", k, v))
   699  	}
   700  	return env, nil
   701  }
   702  
   703  // FaqErrorMsg returns an error message pointing to the FAQ.
   704  func FaqErrorMsg(anchor, msg string) string {
   705  	return fmt.Sprintf("%s; see https://gvisor.dev/faq#%s for more details", msg, anchor)
   706  }