github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/configs/validate/validator.go (about)

     1  package validate
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"strings"
     9  	"sync"
    10  
    11  	"github.com/opencontainers/runc/libcontainer/cgroups"
    12  	"github.com/opencontainers/runc/libcontainer/configs"
    13  	"github.com/opencontainers/runc/libcontainer/intelrdt"
    14  	"github.com/opencontainers/runtime-spec/specs-go"
    15  	selinux "github.com/opencontainers/selinux/go-selinux"
    16  	"github.com/sirupsen/logrus"
    17  	"golang.org/x/sys/unix"
    18  )
    19  
    20  type check func(config *configs.Config) error
    21  
    22  func Validate(config *configs.Config) error {
    23  	checks := []check{
    24  		cgroupsCheck,
    25  		rootfs,
    26  		network,
    27  		uts,
    28  		security,
    29  		namespaces,
    30  		sysctl,
    31  		intelrdtCheck,
    32  		rootlessEUIDCheck,
    33  		mountsStrict,
    34  		scheduler,
    35  		ioPriority,
    36  	}
    37  	for _, c := range checks {
    38  		if err := c(config); err != nil {
    39  			return err
    40  		}
    41  	}
    42  	// Relaxed validation rules for backward compatibility
    43  	warns := []check{
    44  		mountsWarn,
    45  	}
    46  	for _, c := range warns {
    47  		if err := c(config); err != nil {
    48  			logrus.WithError(err).Warn("configuration")
    49  		}
    50  	}
    51  	return nil
    52  }
    53  
    54  // rootfs validates if the rootfs is an absolute path and is not a symlink
    55  // to the container's root filesystem.
    56  func rootfs(config *configs.Config) error {
    57  	if _, err := os.Stat(config.Rootfs); err != nil {
    58  		return fmt.Errorf("invalid rootfs: %w", err)
    59  	}
    60  	cleaned, err := filepath.Abs(config.Rootfs)
    61  	if err != nil {
    62  		return fmt.Errorf("invalid rootfs: %w", err)
    63  	}
    64  	if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
    65  		return fmt.Errorf("invalid rootfs: %w", err)
    66  	}
    67  	if filepath.Clean(config.Rootfs) != cleaned {
    68  		return errors.New("invalid rootfs: not an absolute path, or a symlink")
    69  	}
    70  	return nil
    71  }
    72  
    73  func network(config *configs.Config) error {
    74  	if !config.Namespaces.Contains(configs.NEWNET) {
    75  		if len(config.Networks) > 0 || len(config.Routes) > 0 {
    76  			return errors.New("unable to apply network settings without a private NET namespace")
    77  		}
    78  	}
    79  	return nil
    80  }
    81  
    82  func uts(config *configs.Config) error {
    83  	if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
    84  		return errors.New("unable to set hostname without a private UTS namespace")
    85  	}
    86  	if config.Domainname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
    87  		return errors.New("unable to set domainname without a private UTS namespace")
    88  	}
    89  	return nil
    90  }
    91  
    92  func security(config *configs.Config) error {
    93  	// restrict sys without mount namespace
    94  	if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
    95  		!config.Namespaces.Contains(configs.NEWNS) {
    96  		return errors.New("unable to restrict sys entries without a private MNT namespace")
    97  	}
    98  	if config.ProcessLabel != "" && !selinux.GetEnabled() {
    99  		return errors.New("selinux label is specified in config, but selinux is disabled or not supported")
   100  	}
   101  
   102  	return nil
   103  }
   104  
   105  func namespaces(config *configs.Config) error {
   106  	if config.Namespaces.Contains(configs.NEWUSER) {
   107  		if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
   108  			return errors.New("user namespaces aren't enabled in the kernel")
   109  		}
   110  		hasPath := config.Namespaces.PathOf(configs.NEWUSER) != ""
   111  		hasMappings := config.UIDMappings != nil || config.GIDMappings != nil
   112  		if !hasPath && !hasMappings {
   113  			return errors.New("user namespaces enabled, but no namespace path to join nor mappings to apply specified")
   114  		}
   115  		// The hasPath && hasMappings validation case is handled in specconv --
   116  		// we cache the mappings in Config during specconv in the hasPath case,
   117  		// so we cannot do that validation here.
   118  	} else {
   119  		if config.UIDMappings != nil || config.GIDMappings != nil {
   120  			return errors.New("user namespace mappings specified, but user namespace isn't enabled in the config")
   121  		}
   122  	}
   123  
   124  	if config.Namespaces.Contains(configs.NEWCGROUP) {
   125  		if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
   126  			return errors.New("cgroup namespaces aren't enabled in the kernel")
   127  		}
   128  	}
   129  
   130  	if config.Namespaces.Contains(configs.NEWTIME) {
   131  		if _, err := os.Stat("/proc/self/timens_offsets"); os.IsNotExist(err) {
   132  			return errors.New("time namespaces aren't enabled in the kernel")
   133  		}
   134  		hasPath := config.Namespaces.PathOf(configs.NEWTIME) != ""
   135  		hasOffsets := config.TimeOffsets != nil
   136  		if hasPath && hasOffsets {
   137  			return errors.New("time namespace enabled, but both namespace path and time offsets specified -- you may only provide one")
   138  		}
   139  	} else {
   140  		if config.TimeOffsets != nil {
   141  			return errors.New("time namespace offsets specified, but time namespace isn't enabled in the config")
   142  		}
   143  	}
   144  
   145  	return nil
   146  }
   147  
   148  // convertSysctlVariableToDotsSeparator can return sysctl variables in dots separator format.
   149  // The '/' separator is also accepted in place of a '.'.
   150  // Convert the sysctl variables to dots separator format for validation.
   151  // More info: sysctl(8), sysctl.d(5).
   152  //
   153  // For example:
   154  // Input sysctl variable "net/ipv4/conf/eno2.100.rp_filter"
   155  // will return the converted value "net.ipv4.conf.eno2/100.rp_filter"
   156  func convertSysctlVariableToDotsSeparator(val string) string {
   157  	if val == "" {
   158  		return val
   159  	}
   160  	firstSepIndex := strings.IndexAny(val, "./")
   161  	if firstSepIndex == -1 || val[firstSepIndex] == '.' {
   162  		return val
   163  	}
   164  
   165  	f := func(r rune) rune {
   166  		switch r {
   167  		case '.':
   168  			return '/'
   169  		case '/':
   170  			return '.'
   171  		}
   172  		return r
   173  	}
   174  	return strings.Map(f, val)
   175  }
   176  
   177  // sysctl validates that the specified sysctl keys are valid or not.
   178  // /proc/sys isn't completely namespaced and depending on which namespaces
   179  // are specified, a subset of sysctls are permitted.
   180  func sysctl(config *configs.Config) error {
   181  	validSysctlMap := map[string]bool{
   182  		"kernel.msgmax":          true,
   183  		"kernel.msgmnb":          true,
   184  		"kernel.msgmni":          true,
   185  		"kernel.sem":             true,
   186  		"kernel.shmall":          true,
   187  		"kernel.shmmax":          true,
   188  		"kernel.shmmni":          true,
   189  		"kernel.shm_rmid_forced": true,
   190  	}
   191  
   192  	var (
   193  		netOnce    sync.Once
   194  		hostnet    bool
   195  		hostnetErr error
   196  	)
   197  
   198  	for s := range config.Sysctl {
   199  		s := convertSysctlVariableToDotsSeparator(s)
   200  		if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") {
   201  			if config.Namespaces.Contains(configs.NEWIPC) {
   202  				continue
   203  			} else {
   204  				return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s)
   205  			}
   206  		}
   207  		if strings.HasPrefix(s, "net.") {
   208  			// Is container using host netns?
   209  			// Here "host" means "current", not "initial".
   210  			netOnce.Do(func() {
   211  				if !config.Namespaces.Contains(configs.NEWNET) {
   212  					hostnet = true
   213  					return
   214  				}
   215  				path := config.Namespaces.PathOf(configs.NEWNET)
   216  				if path == "" {
   217  					// own netns, so hostnet = false
   218  					return
   219  				}
   220  				hostnet, hostnetErr = isHostNetNS(path)
   221  			})
   222  			if hostnetErr != nil {
   223  				return fmt.Errorf("invalid netns path: %w", hostnetErr)
   224  			}
   225  			if hostnet {
   226  				return fmt.Errorf("sysctl %q not allowed in host network namespace", s)
   227  			}
   228  			continue
   229  		}
   230  		if config.Namespaces.Contains(configs.NEWUTS) {
   231  			switch s {
   232  			case "kernel.domainname":
   233  				// This is namespaced and there's no explicit OCI field for it.
   234  				continue
   235  			case "kernel.hostname":
   236  				// This is namespaced but there's a conflicting (dedicated) OCI field for it.
   237  				return fmt.Errorf("sysctl %q is not allowed as it conflicts with the OCI %q field", s, "hostname")
   238  			}
   239  		}
   240  		return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s)
   241  	}
   242  
   243  	return nil
   244  }
   245  
   246  func intelrdtCheck(config *configs.Config) error {
   247  	if config.IntelRdt != nil {
   248  		if config.IntelRdt.ClosID == "." || config.IntelRdt.ClosID == ".." || strings.Contains(config.IntelRdt.ClosID, "/") {
   249  			return fmt.Errorf("invalid intelRdt.ClosID %q", config.IntelRdt.ClosID)
   250  		}
   251  
   252  		if !intelrdt.IsCATEnabled() && config.IntelRdt.L3CacheSchema != "" {
   253  			return errors.New("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled")
   254  		}
   255  		if !intelrdt.IsMBAEnabled() && config.IntelRdt.MemBwSchema != "" {
   256  			return errors.New("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled")
   257  		}
   258  	}
   259  
   260  	return nil
   261  }
   262  
   263  func cgroupsCheck(config *configs.Config) error {
   264  	c := config.Cgroups
   265  	if c == nil {
   266  		return nil
   267  	}
   268  
   269  	if (c.Name != "" || c.Parent != "") && c.Path != "" {
   270  		return fmt.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
   271  	}
   272  
   273  	r := c.Resources
   274  	if r == nil {
   275  		return nil
   276  	}
   277  
   278  	if !cgroups.IsCgroup2UnifiedMode() && r.Unified != nil {
   279  		return cgroups.ErrV1NoUnified
   280  	}
   281  
   282  	if cgroups.IsCgroup2UnifiedMode() {
   283  		_, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
   284  		if err != nil {
   285  			return err
   286  		}
   287  	}
   288  
   289  	return nil
   290  }
   291  
   292  func checkBindOptions(m *configs.Mount) error {
   293  	if !m.IsBind() {
   294  		return nil
   295  	}
   296  	// We must reject bind-mounts that also have filesystem-specific mount
   297  	// options, because the kernel will completely ignore these flags and we
   298  	// cannot set them per-mountpoint.
   299  	//
   300  	// It should be noted that (due to how the kernel caches superblocks), data
   301  	// options could also silently ignored for other filesystems even when
   302  	// doing a fresh mount, but there is no real way to avoid this (and it
   303  	// matches how everything else works). There have been proposals to make it
   304  	// possible for userspace to detect this caching, but this wouldn't help
   305  	// runc because the behaviour wouldn't even be desirable for most users.
   306  	if m.Data != "" {
   307  		return errors.New("bind mounts cannot have any filesystem-specific options applied")
   308  	}
   309  	return nil
   310  }
   311  
   312  func checkIDMapMounts(config *configs.Config, m *configs.Mount) error {
   313  	// Make sure MOUNT_ATTR_IDMAP is not set on any of our mounts. This
   314  	// attribute is handled differently to all other attributes (through
   315  	// m.IDMapping), so make sure we never store it in the actual config. This
   316  	// really shouldn't ever happen.
   317  	if m.RecAttr != nil && (m.RecAttr.Attr_set|m.RecAttr.Attr_clr)&unix.MOUNT_ATTR_IDMAP != 0 {
   318  		return errors.New("mount configuration cannot contain recAttr for MOUNT_ATTR_IDMAP")
   319  	}
   320  	if !m.IsIDMapped() {
   321  		return nil
   322  	}
   323  	if !m.IsBind() {
   324  		return errors.New("id-mapped mounts are only supported for bind-mounts")
   325  	}
   326  	if config.RootlessEUID {
   327  		return errors.New("id-mapped mounts are not supported for rootless containers")
   328  	}
   329  	if m.IDMapping.UserNSPath == "" {
   330  		if len(m.IDMapping.UIDMappings) == 0 || len(m.IDMapping.GIDMappings) == 0 {
   331  			return errors.New("id-mapped mounts must have both uid and gid mappings specified")
   332  		}
   333  	} else {
   334  		if m.IDMapping.UIDMappings != nil || m.IDMapping.GIDMappings != nil {
   335  			// should never happen
   336  			return errors.New("[internal error] id-mapped mounts cannot have both userns_path and uid and gid mappings specified")
   337  		}
   338  	}
   339  	return nil
   340  }
   341  
   342  func mountsWarn(config *configs.Config) error {
   343  	for _, m := range config.Mounts {
   344  		if !filepath.IsAbs(m.Destination) {
   345  			return fmt.Errorf("mount %+v: relative destination path is **deprecated**, using it as relative to /", m)
   346  		}
   347  	}
   348  	return nil
   349  }
   350  
   351  func mountsStrict(config *configs.Config) error {
   352  	for _, m := range config.Mounts {
   353  		if err := checkBindOptions(m); err != nil {
   354  			return fmt.Errorf("invalid mount %+v: %w", m, err)
   355  		}
   356  		if err := checkIDMapMounts(config, m); err != nil {
   357  			return fmt.Errorf("invalid mount %+v: %w", m, err)
   358  		}
   359  	}
   360  	return nil
   361  }
   362  
   363  func isHostNetNS(path string) (bool, error) {
   364  	const currentProcessNetns = "/proc/self/ns/net"
   365  
   366  	var st1, st2 unix.Stat_t
   367  
   368  	if err := unix.Stat(currentProcessNetns, &st1); err != nil {
   369  		return false, &os.PathError{Op: "stat", Path: currentProcessNetns, Err: err}
   370  	}
   371  	if err := unix.Stat(path, &st2); err != nil {
   372  		return false, &os.PathError{Op: "stat", Path: path, Err: err}
   373  	}
   374  
   375  	return (st1.Dev == st2.Dev) && (st1.Ino == st2.Ino), nil
   376  }
   377  
   378  // scheduler is to validate scheduler configs according to https://man7.org/linux/man-pages/man2/sched_setattr.2.html
   379  func scheduler(config *configs.Config) error {
   380  	s := config.Scheduler
   381  	if s == nil {
   382  		return nil
   383  	}
   384  	if s.Policy == "" {
   385  		return errors.New("scheduler policy is required")
   386  	}
   387  	if s.Policy == specs.SchedOther || s.Policy == specs.SchedBatch {
   388  		if s.Nice < -20 || s.Nice > 19 {
   389  			return fmt.Errorf("invalid scheduler.nice: %d when scheduler.policy is %s", s.Nice, string(s.Policy))
   390  		}
   391  	}
   392  	if s.Priority != 0 && (s.Policy != specs.SchedFIFO && s.Policy != specs.SchedRR) {
   393  		return errors.New("scheduler.priority can only be specified for SchedFIFO or SchedRR policy")
   394  	}
   395  	if s.Policy != specs.SchedDeadline && (s.Runtime != 0 || s.Deadline != 0 || s.Period != 0) {
   396  		return errors.New("scheduler runtime/deadline/period can only be specified for SchedDeadline policy")
   397  	}
   398  	return nil
   399  }
   400  
   401  func ioPriority(config *configs.Config) error {
   402  	if config.IOPriority == nil {
   403  		return nil
   404  	}
   405  	priority := config.IOPriority.Priority
   406  	if priority < 0 || priority > 7 {
   407  		return fmt.Errorf("invalid ioPriority.Priority: %d", priority)
   408  	}
   409  	return nil
   410  }