github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/configs/validate/validator.go (about) 1 package validate 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "path/filepath" 8 "strings" 9 "sync" 10 11 "github.com/opencontainers/runc/libcontainer/cgroups" 12 "github.com/opencontainers/runc/libcontainer/configs" 13 "github.com/opencontainers/runc/libcontainer/intelrdt" 14 "github.com/opencontainers/runtime-spec/specs-go" 15 selinux "github.com/opencontainers/selinux/go-selinux" 16 "github.com/sirupsen/logrus" 17 "golang.org/x/sys/unix" 18 ) 19 20 type check func(config *configs.Config) error 21 22 func Validate(config *configs.Config) error { 23 checks := []check{ 24 cgroupsCheck, 25 rootfs, 26 network, 27 uts, 28 security, 29 namespaces, 30 sysctl, 31 intelrdtCheck, 32 rootlessEUIDCheck, 33 mountsStrict, 34 scheduler, 35 ioPriority, 36 } 37 for _, c := range checks { 38 if err := c(config); err != nil { 39 return err 40 } 41 } 42 // Relaxed validation rules for backward compatibility 43 warns := []check{ 44 mountsWarn, 45 } 46 for _, c := range warns { 47 if err := c(config); err != nil { 48 logrus.WithError(err).Warn("configuration") 49 } 50 } 51 return nil 52 } 53 54 // rootfs validates if the rootfs is an absolute path and is not a symlink 55 // to the container's root filesystem. 56 func rootfs(config *configs.Config) error { 57 if _, err := os.Stat(config.Rootfs); err != nil { 58 return fmt.Errorf("invalid rootfs: %w", err) 59 } 60 cleaned, err := filepath.Abs(config.Rootfs) 61 if err != nil { 62 return fmt.Errorf("invalid rootfs: %w", err) 63 } 64 if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil { 65 return fmt.Errorf("invalid rootfs: %w", err) 66 } 67 if filepath.Clean(config.Rootfs) != cleaned { 68 return errors.New("invalid rootfs: not an absolute path, or a symlink") 69 } 70 return nil 71 } 72 73 func network(config *configs.Config) error { 74 if !config.Namespaces.Contains(configs.NEWNET) { 75 if len(config.Networks) > 0 || len(config.Routes) > 0 { 76 return errors.New("unable to apply network settings without a private NET namespace") 77 } 78 } 79 return nil 80 } 81 82 func uts(config *configs.Config) error { 83 if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) { 84 return errors.New("unable to set hostname without a private UTS namespace") 85 } 86 if config.Domainname != "" && !config.Namespaces.Contains(configs.NEWUTS) { 87 return errors.New("unable to set domainname without a private UTS namespace") 88 } 89 return nil 90 } 91 92 func security(config *configs.Config) error { 93 // restrict sys without mount namespace 94 if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) && 95 !config.Namespaces.Contains(configs.NEWNS) { 96 return errors.New("unable to restrict sys entries without a private MNT namespace") 97 } 98 if config.ProcessLabel != "" && !selinux.GetEnabled() { 99 return errors.New("selinux label is specified in config, but selinux is disabled or not supported") 100 } 101 102 return nil 103 } 104 105 func namespaces(config *configs.Config) error { 106 if config.Namespaces.Contains(configs.NEWUSER) { 107 if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { 108 return errors.New("user namespaces aren't enabled in the kernel") 109 } 110 hasPath := config.Namespaces.PathOf(configs.NEWUSER) != "" 111 hasMappings := config.UIDMappings != nil || config.GIDMappings != nil 112 if !hasPath && !hasMappings { 113 return errors.New("user namespaces enabled, but no namespace path to join nor mappings to apply specified") 114 } 115 // The hasPath && hasMappings validation case is handled in specconv -- 116 // we cache the mappings in Config during specconv in the hasPath case, 117 // so we cannot do that validation here. 118 } else { 119 if config.UIDMappings != nil || config.GIDMappings != nil { 120 return errors.New("user namespace mappings specified, but user namespace isn't enabled in the config") 121 } 122 } 123 124 if config.Namespaces.Contains(configs.NEWCGROUP) { 125 if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) { 126 return errors.New("cgroup namespaces aren't enabled in the kernel") 127 } 128 } 129 130 if config.Namespaces.Contains(configs.NEWTIME) { 131 if _, err := os.Stat("/proc/self/timens_offsets"); os.IsNotExist(err) { 132 return errors.New("time namespaces aren't enabled in the kernel") 133 } 134 hasPath := config.Namespaces.PathOf(configs.NEWTIME) != "" 135 hasOffsets := config.TimeOffsets != nil 136 if hasPath && hasOffsets { 137 return errors.New("time namespace enabled, but both namespace path and time offsets specified -- you may only provide one") 138 } 139 } else { 140 if config.TimeOffsets != nil { 141 return errors.New("time namespace offsets specified, but time namespace isn't enabled in the config") 142 } 143 } 144 145 return nil 146 } 147 148 // convertSysctlVariableToDotsSeparator can return sysctl variables in dots separator format. 149 // The '/' separator is also accepted in place of a '.'. 150 // Convert the sysctl variables to dots separator format for validation. 151 // More info: sysctl(8), sysctl.d(5). 152 // 153 // For example: 154 // Input sysctl variable "net/ipv4/conf/eno2.100.rp_filter" 155 // will return the converted value "net.ipv4.conf.eno2/100.rp_filter" 156 func convertSysctlVariableToDotsSeparator(val string) string { 157 if val == "" { 158 return val 159 } 160 firstSepIndex := strings.IndexAny(val, "./") 161 if firstSepIndex == -1 || val[firstSepIndex] == '.' { 162 return val 163 } 164 165 f := func(r rune) rune { 166 switch r { 167 case '.': 168 return '/' 169 case '/': 170 return '.' 171 } 172 return r 173 } 174 return strings.Map(f, val) 175 } 176 177 // sysctl validates that the specified sysctl keys are valid or not. 178 // /proc/sys isn't completely namespaced and depending on which namespaces 179 // are specified, a subset of sysctls are permitted. 180 func sysctl(config *configs.Config) error { 181 validSysctlMap := map[string]bool{ 182 "kernel.msgmax": true, 183 "kernel.msgmnb": true, 184 "kernel.msgmni": true, 185 "kernel.sem": true, 186 "kernel.shmall": true, 187 "kernel.shmmax": true, 188 "kernel.shmmni": true, 189 "kernel.shm_rmid_forced": true, 190 } 191 192 var ( 193 netOnce sync.Once 194 hostnet bool 195 hostnetErr error 196 ) 197 198 for s := range config.Sysctl { 199 s := convertSysctlVariableToDotsSeparator(s) 200 if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") { 201 if config.Namespaces.Contains(configs.NEWIPC) { 202 continue 203 } else { 204 return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s) 205 } 206 } 207 if strings.HasPrefix(s, "net.") { 208 // Is container using host netns? 209 // Here "host" means "current", not "initial". 210 netOnce.Do(func() { 211 if !config.Namespaces.Contains(configs.NEWNET) { 212 hostnet = true 213 return 214 } 215 path := config.Namespaces.PathOf(configs.NEWNET) 216 if path == "" { 217 // own netns, so hostnet = false 218 return 219 } 220 hostnet, hostnetErr = isHostNetNS(path) 221 }) 222 if hostnetErr != nil { 223 return fmt.Errorf("invalid netns path: %w", hostnetErr) 224 } 225 if hostnet { 226 return fmt.Errorf("sysctl %q not allowed in host network namespace", s) 227 } 228 continue 229 } 230 if config.Namespaces.Contains(configs.NEWUTS) { 231 switch s { 232 case "kernel.domainname": 233 // This is namespaced and there's no explicit OCI field for it. 234 continue 235 case "kernel.hostname": 236 // This is namespaced but there's a conflicting (dedicated) OCI field for it. 237 return fmt.Errorf("sysctl %q is not allowed as it conflicts with the OCI %q field", s, "hostname") 238 } 239 } 240 return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s) 241 } 242 243 return nil 244 } 245 246 func intelrdtCheck(config *configs.Config) error { 247 if config.IntelRdt != nil { 248 if config.IntelRdt.ClosID == "." || config.IntelRdt.ClosID == ".." || strings.Contains(config.IntelRdt.ClosID, "/") { 249 return fmt.Errorf("invalid intelRdt.ClosID %q", config.IntelRdt.ClosID) 250 } 251 252 if !intelrdt.IsCATEnabled() && config.IntelRdt.L3CacheSchema != "" { 253 return errors.New("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled") 254 } 255 if !intelrdt.IsMBAEnabled() && config.IntelRdt.MemBwSchema != "" { 256 return errors.New("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled") 257 } 258 } 259 260 return nil 261 } 262 263 func cgroupsCheck(config *configs.Config) error { 264 c := config.Cgroups 265 if c == nil { 266 return nil 267 } 268 269 if (c.Name != "" || c.Parent != "") && c.Path != "" { 270 return fmt.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c) 271 } 272 273 r := c.Resources 274 if r == nil { 275 return nil 276 } 277 278 if !cgroups.IsCgroup2UnifiedMode() && r.Unified != nil { 279 return cgroups.ErrV1NoUnified 280 } 281 282 if cgroups.IsCgroup2UnifiedMode() { 283 _, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory) 284 if err != nil { 285 return err 286 } 287 } 288 289 return nil 290 } 291 292 func checkBindOptions(m *configs.Mount) error { 293 if !m.IsBind() { 294 return nil 295 } 296 // We must reject bind-mounts that also have filesystem-specific mount 297 // options, because the kernel will completely ignore these flags and we 298 // cannot set them per-mountpoint. 299 // 300 // It should be noted that (due to how the kernel caches superblocks), data 301 // options could also silently ignored for other filesystems even when 302 // doing a fresh mount, but there is no real way to avoid this (and it 303 // matches how everything else works). There have been proposals to make it 304 // possible for userspace to detect this caching, but this wouldn't help 305 // runc because the behaviour wouldn't even be desirable for most users. 306 if m.Data != "" { 307 return errors.New("bind mounts cannot have any filesystem-specific options applied") 308 } 309 return nil 310 } 311 312 func checkIDMapMounts(config *configs.Config, m *configs.Mount) error { 313 // Make sure MOUNT_ATTR_IDMAP is not set on any of our mounts. This 314 // attribute is handled differently to all other attributes (through 315 // m.IDMapping), so make sure we never store it in the actual config. This 316 // really shouldn't ever happen. 317 if m.RecAttr != nil && (m.RecAttr.Attr_set|m.RecAttr.Attr_clr)&unix.MOUNT_ATTR_IDMAP != 0 { 318 return errors.New("mount configuration cannot contain recAttr for MOUNT_ATTR_IDMAP") 319 } 320 if !m.IsIDMapped() { 321 return nil 322 } 323 if !m.IsBind() { 324 return errors.New("id-mapped mounts are only supported for bind-mounts") 325 } 326 if config.RootlessEUID { 327 return errors.New("id-mapped mounts are not supported for rootless containers") 328 } 329 if m.IDMapping.UserNSPath == "" { 330 if len(m.IDMapping.UIDMappings) == 0 || len(m.IDMapping.GIDMappings) == 0 { 331 return errors.New("id-mapped mounts must have both uid and gid mappings specified") 332 } 333 } else { 334 if m.IDMapping.UIDMappings != nil || m.IDMapping.GIDMappings != nil { 335 // should never happen 336 return errors.New("[internal error] id-mapped mounts cannot have both userns_path and uid and gid mappings specified") 337 } 338 } 339 return nil 340 } 341 342 func mountsWarn(config *configs.Config) error { 343 for _, m := range config.Mounts { 344 if !filepath.IsAbs(m.Destination) { 345 return fmt.Errorf("mount %+v: relative destination path is **deprecated**, using it as relative to /", m) 346 } 347 } 348 return nil 349 } 350 351 func mountsStrict(config *configs.Config) error { 352 for _, m := range config.Mounts { 353 if err := checkBindOptions(m); err != nil { 354 return fmt.Errorf("invalid mount %+v: %w", m, err) 355 } 356 if err := checkIDMapMounts(config, m); err != nil { 357 return fmt.Errorf("invalid mount %+v: %w", m, err) 358 } 359 } 360 return nil 361 } 362 363 func isHostNetNS(path string) (bool, error) { 364 const currentProcessNetns = "/proc/self/ns/net" 365 366 var st1, st2 unix.Stat_t 367 368 if err := unix.Stat(currentProcessNetns, &st1); err != nil { 369 return false, &os.PathError{Op: "stat", Path: currentProcessNetns, Err: err} 370 } 371 if err := unix.Stat(path, &st2); err != nil { 372 return false, &os.PathError{Op: "stat", Path: path, Err: err} 373 } 374 375 return (st1.Dev == st2.Dev) && (st1.Ino == st2.Ino), nil 376 } 377 378 // scheduler is to validate scheduler configs according to https://man7.org/linux/man-pages/man2/sched_setattr.2.html 379 func scheduler(config *configs.Config) error { 380 s := config.Scheduler 381 if s == nil { 382 return nil 383 } 384 if s.Policy == "" { 385 return errors.New("scheduler policy is required") 386 } 387 if s.Policy == specs.SchedOther || s.Policy == specs.SchedBatch { 388 if s.Nice < -20 || s.Nice > 19 { 389 return fmt.Errorf("invalid scheduler.nice: %d when scheduler.policy is %s", s.Nice, string(s.Policy)) 390 } 391 } 392 if s.Priority != 0 && (s.Policy != specs.SchedFIFO && s.Policy != specs.SchedRR) { 393 return errors.New("scheduler.priority can only be specified for SchedFIFO or SchedRR policy") 394 } 395 if s.Policy != specs.SchedDeadline && (s.Runtime != 0 || s.Deadline != 0 || s.Period != 0) { 396 return errors.New("scheduler runtime/deadline/period can only be specified for SchedDeadline policy") 397 } 398 return nil 399 } 400 401 func ioPriority(config *configs.Config) error { 402 if config.IOPriority == nil { 403 return nil 404 } 405 priority := config.IOPriority.Priority 406 if priority < 0 || priority > 7 { 407 return fmt.Errorf("invalid ioPriority.Priority: %d", priority) 408 } 409 return nil 410 }