gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/runsc/config/config.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package config provides basic infrastructure to set configuration settings 16 // for runsc. The configuration is set by flags to the command line. They can 17 // also propagate to a different process using the same flags. 18 package config 19 20 import ( 21 "fmt" 22 "path/filepath" 23 "reflect" 24 "runtime" 25 "strconv" 26 "strings" 27 28 "gvisor.dev/gvisor/pkg/log" 29 "gvisor.dev/gvisor/pkg/refs" 30 "gvisor.dev/gvisor/pkg/sentry/watchdog" 31 "gvisor.dev/gvisor/runsc/flag" 32 "gvisor.dev/gvisor/runsc/version" 33 ) 34 35 // Config holds configuration that is not part of the runtime spec. 36 // 37 // Follow these steps to add a new flag: 38 // 1. Create a new field in Config. 39 // 2. Add a field tag with the flag name 40 // 3. Register a new flag in flags.go, with same name and add a description 41 // 4. Add any necessary validation into validate() 42 // 5. If adding an enum, follow the same pattern as FileAccessType 43 // 6. Evaluate if the flag can be changed with OCI annotations. See 44 // overrideAllowlist for more details 45 type Config struct { 46 // RootDir is the runtime root directory. 47 RootDir string `flag:"root"` 48 49 // Traceback changes the Go runtime's traceback level. 50 Traceback string `flag:"traceback"` 51 52 // Debug indicates that debug logging should be enabled. 53 Debug bool `flag:"debug"` 54 55 // LogFilename is the filename to log to, if not empty. 56 LogFilename string `flag:"log"` 57 58 // LogFormat is the log format. 59 LogFormat string `flag:"log-format"` 60 61 // DebugLog is the path to log debug information to, if not empty. 62 // If specified together with `DebugToUserLog`, debug logs are emitted 63 // to both. 64 DebugLog string `flag:"debug-log"` 65 66 // DebugToUserLog indicates that Sentry debug logs should be emitted 67 // to user-visible logs. 68 // If specified together with `DebugLog`, debug logs are emitted 69 // to both. 70 DebugToUserLog bool `flag:"debug-to-user-log"` 71 72 // DebugCommand is a comma-separated list of commands to be debugged if 73 // --debug-log is also set. Empty means debug all. "!" negates the expression. 74 // E.g. "create,start" or "!boot,events". 75 DebugCommand string `flag:"debug-command"` 76 77 // PanicLog is the path to log GO's runtime messages, if not empty. 78 PanicLog string `flag:"panic-log"` 79 80 // CoverageReport is the path to write Go coverage information, if not empty. 81 CoverageReport string `flag:"coverage-report"` 82 83 // DebugLogFormat is the log format for debug. 84 DebugLogFormat string `flag:"debug-log-format"` 85 86 // FileAccess indicates how the root filesystem is accessed. 87 FileAccess FileAccessType `flag:"file-access"` 88 89 // FileAccessMounts indicates how non-root volumes are accessed. 90 FileAccessMounts FileAccessType `flag:"file-access-mounts"` 91 92 // Overlay is whether to wrap all mounts in an overlay. The upper tmpfs layer 93 // will be backed by application memory. 94 Overlay bool `flag:"overlay"` 95 96 // Overlay2 holds configuration about wrapping mounts in overlayfs. 97 // DO NOT call it directly, use GetOverlay2() instead. 98 Overlay2 Overlay2 `flag:"overlay2"` 99 100 // FSGoferHostUDS is deprecated: use host-uds=all. 101 FSGoferHostUDS bool `flag:"fsgofer-host-uds"` 102 103 // HostUDS controls permission to access host Unix-domain sockets. 104 // DO NOT call it directly, use GetHostUDS() instead. 105 HostUDS HostUDS `flag:"host-uds"` 106 107 // HostFifo controls permission to access host FIFO (or named pipes). 108 HostFifo HostFifo `flag:"host-fifo"` 109 110 // Network indicates what type of network to use. 111 Network NetworkType `flag:"network"` 112 113 // EnableRaw indicates whether raw sockets should be enabled. Raw 114 // sockets are disabled by stripping CAP_NET_RAW from the list of 115 // capabilities. 116 EnableRaw bool `flag:"net-raw"` 117 118 // AllowPacketEndpointWrite enables write operations on packet endpoints. 119 AllowPacketEndpointWrite bool `flag:"TESTONLY-allow-packet-endpoint-write"` 120 121 // HostGSO indicates that host segmentation offload is enabled. 122 HostGSO bool `flag:"gso"` 123 124 // GVisorGSO indicates that gVisor segmentation offload is enabled. The flag 125 // retains its old name of "software" GSO for API consistency. 126 GVisorGSO bool `flag:"software-gso"` 127 128 // GVisorGRO enables gVisor's generic receive offload. 129 GVisorGRO bool `flag:"gvisor-gro"` 130 131 // TXChecksumOffload indicates that TX Checksum Offload is enabled. 132 TXChecksumOffload bool `flag:"tx-checksum-offload"` 133 134 // RXChecksumOffload indicates that RX Checksum Offload is enabled. 135 RXChecksumOffload bool `flag:"rx-checksum-offload"` 136 137 // QDisc indicates the type of queuening discipline to use by default 138 // for non-loopback interfaces. 139 QDisc QueueingDiscipline `flag:"qdisc"` 140 141 // LogPackets indicates that all network packets should be logged. 142 LogPackets bool `flag:"log-packets"` 143 144 // PCAP is a file to which network packets should be logged in PCAP format. 145 PCAP string `flag:"pcap-log"` 146 147 // Platform is the platform to run on. 148 Platform string `flag:"platform"` 149 150 // PlatformDevicePath is the path to the device file used by the platform. 151 // e.g. "/dev/kvm" for the KVM platform. 152 // If unset, a sane platform-specific default will be used. 153 PlatformDevicePath string `flag:"platform_device_path"` 154 155 // MetricServer, if set, indicates that metrics should be exported on this address. 156 // This may either be 1) "addr:port" to export metrics on a specific network interface address, 157 // 2) ":port" for exporting metrics on all addresses, or 3) an absolute path to a Unix Domain 158 // Socket. 159 // The substring "%ID%" will be replaced by the container ID, and "%RUNTIME_ROOT%" by the root. 160 // This flag must be specified *both* as part of the `runsc metric-server` arguments (so that the 161 // metric server knows which address to bind to), and as part of the `runsc create` arguments (as 162 // an indication that the container being created wishes that its metrics should be exported). 163 // The value of this flag must also match across the two command lines. 164 MetricServer string `flag:"metric-server"` 165 166 // ProfilingMetrics is a comma separated list of metric names which are 167 // going to be written to the ProfilingMetricsLog file from within the 168 // sentry in CSV format. ProfilingMetrics will be snapshotted at a rate 169 // specified by ProfilingMetricsRate. Requires ProfilingMetricsLog to be 170 // set. 171 ProfilingMetrics string `flag:"profiling-metrics"` 172 173 // ProfilingMetricsLog is the file name to use for ProfilingMetrics 174 // output. 175 ProfilingMetricsLog string `flag:"profiling-metrics-log"` 176 177 // ProfilingMetricsRate is the target rate (in microseconds) at which 178 // profiling metrics will be snapshotted. 179 ProfilingMetricsRate int `flag:"profiling-metrics-rate-us"` 180 181 // Strace indicates that strace should be enabled. 182 Strace bool `flag:"strace"` 183 184 // StraceSyscalls is the set of syscalls to trace (comma-separated values). 185 // If StraceEnable is true and this string is empty, then all syscalls will 186 // be traced. 187 StraceSyscalls string `flag:"strace-syscalls"` 188 189 // StraceLogSize is the max size of data blobs to display. 190 StraceLogSize uint `flag:"strace-log-size"` 191 192 // StraceEvent indicates sending strace to events if true. Strace is 193 // sent to log if false. 194 StraceEvent bool `flag:"strace-event"` 195 196 // DisableSeccomp indicates whether seccomp syscall filters should be 197 // disabled. Pardon the double negation, but default to enabled is important. 198 DisableSeccomp bool 199 200 // EnableCoreTags indicates whether the Sentry process and children will be 201 // run in a core tagged process. This isolates the sentry from sharing 202 // physical cores with other core tagged processes. This is useful as a 203 // mitigation for hyperthreading side channel based attacks. Requires host 204 // linux kernel >= 5.14. 205 EnableCoreTags bool `flag:"enable-core-tags"` 206 207 // WatchdogAction sets what action the watchdog takes when triggered. 208 WatchdogAction watchdog.Action `flag:"watchdog-action"` 209 210 // PanicSignal registers signal handling that panics. Usually set to 211 // SIGUSR2(12) to troubleshoot hangs. -1 disables it. 212 PanicSignal int `flag:"panic-signal"` 213 214 // ProfileEnable is set to prepare the sandbox to be profiled. 215 ProfileEnable bool `flag:"profile"` 216 217 // ProfileBlock collects a block profile to the passed file for the 218 // duration of the container execution. Requires ProfileEnabled. 219 ProfileBlock string `flag:"profile-block"` 220 221 // ProfileCPU collects a CPU profile to the passed file for the 222 // duration of the container execution. Requires ProfileEnabled. 223 ProfileCPU string `flag:"profile-cpu"` 224 225 // ProfileHeap collects a heap profile to the passed file for the 226 // duration of the container execution. Requires ProfileEnabled. 227 ProfileHeap string `flag:"profile-heap"` 228 229 // ProfileMutex collects a mutex profile to the passed file for the 230 // duration of the container execution. Requires ProfileEnabled. 231 ProfileMutex string `flag:"profile-mutex"` 232 233 // TraceFile collects a Go runtime execution trace to the passed file 234 // for the duration of the container execution. 235 TraceFile string `flag:"trace"` 236 237 // NumNetworkChannels controls the number of AF_PACKET sockets that map 238 // to the same underlying network device. This allows netstack to better 239 // scale for high throughput use cases. 240 NumNetworkChannels int `flag:"num-network-channels"` 241 242 // NetworkProcessorsPerChannel controls the number of goroutines used to 243 // handle packets on a single network channel. A higher number can help handle 244 // many simultaneous connections. If this is 0, runsc will divide GOMAXPROCS 245 // evenly among each network channel. 246 NetworkProcessorsPerChannel int `flag:"network-processors-per-channel"` 247 248 // Rootless allows the sandbox to be started with a user that is not root. 249 // Defense in depth measures are weaker in rootless mode. Specifically, the 250 // sandbox and Gofer process run as root inside a user namespace with root 251 // mapped to the caller's user. When using rootless, the container root path 252 // should not have a symlink. 253 Rootless bool `flag:"rootless"` 254 255 // AlsoLogToStderr allows to send log messages to stderr. 256 AlsoLogToStderr bool `flag:"alsologtostderr"` 257 258 // ReferenceLeakMode sets reference leak check mode 259 ReferenceLeak refs.LeakMode `flag:"ref-leak-mode"` 260 261 // CPUNumFromQuota sets CPU number count to available CPU quota, using 262 // least integer value greater than or equal to quota. 263 // 264 // E.g. 0.2 CPU quota will result in 1, and 1.9 in 2. 265 CPUNumFromQuota bool `flag:"cpu-num-from-quota"` 266 267 // Allows overriding of flags in OCI annotations. 268 AllowFlagOverride bool `flag:"allow-flag-override"` 269 270 // Enables seccomp inside the sandbox. 271 OCISeccomp bool `flag:"oci-seccomp"` 272 273 // Don't configure cgroups. 274 IgnoreCgroups bool `flag:"ignore-cgroups"` 275 276 // Use systemd to configure cgroups. 277 SystemdCgroup bool `flag:"systemd-cgroup"` 278 279 // PodInitConfig is the path to configuration file with additional steps to 280 // take during pod creation. 281 PodInitConfig string `flag:"pod-init-config"` 282 283 // Use pools to manage buffer memory instead of heap. 284 BufferPooling bool `flag:"buffer-pooling"` 285 286 // XDP controls Whether and how to use XDP. 287 XDP XDP `flag:"EXPERIMENTAL-xdp"` 288 289 // AFXDPUseNeedWakeup determines whether XDP_USE_NEED_WAKEUP is set 290 // when using AF_XDP sockets. 291 AFXDPUseNeedWakeup bool `flag:"EXPERIMENTAL-xdp-need-wakeup"` 292 293 // FDLimit specifies a limit on the number of host file descriptors that can 294 // be open simultaneously by the sentry and gofer. It applies separately to 295 // each. 296 FDLimit int `flag:"fdlimit"` 297 298 // DCache sets the global dirent cache size. If negative, per-mount caches are 299 // used. 300 DCache int `flag:"dcache"` 301 302 // IOUring enables support for the IO_URING API calls to perform 303 // asynchronous I/O operations. 304 IOUring bool `flag:"iouring"` 305 306 // DirectFS sets up the sandbox to directly access/mutate the filesystem from 307 // the sentry. Sentry runs with escalated privileges. Gofer process still 308 // exists, but is mostly idle. Not supported in rootless mode. 309 DirectFS bool `flag:"directfs"` 310 311 // NVProxy enables support for Nvidia GPUs. 312 NVProxy bool `flag:"nvproxy"` 313 314 // NVProxyDocker is deprecated. Please use nvidia-container-runtime or 315 // `docker run --gpus` directly. For backward compatibility, this has the 316 // effect of injecting nvidia-container-runtime-hook as a prestart hook. 317 NVProxyDocker bool `flag:"nvproxy-docker"` 318 319 // NVProxyDriverVersion is the version of the NVIDIA driver ABI to use. 320 // If empty, it is autodetected from the installed NVIDIA driver. 321 // It can also be set to the special value "latest" to force the use of 322 // the latest supported NVIDIA driver ABI. 323 NVProxyDriverVersion string `flag:"nvproxy-driver-version"` 324 325 // TPUProxy enables support for TPUs. 326 TPUProxy bool `flag:"tpuproxy"` 327 328 // TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in 329 // tests. It allows runsc to start the sandbox process as the current 330 // user, and without chrooting the sandbox process. This can be 331 // necessary in test environments that have limited capabilities. When 332 // disabling chroot, the container root path should not have a symlink. 333 TestOnlyAllowRunAsCurrentUserWithoutChroot bool `flag:"TESTONLY-unsafe-nonroot"` 334 335 // TestOnlyTestNameEnv should only be used in tests. It looks up for the 336 // test name in the container environment variables and adds it to the debug 337 // log file name. This is done to help identify the log with the test when 338 // multiple tests are run in parallel, since there is no way to pass 339 // parameters to the runtime from docker. 340 TestOnlyTestNameEnv string `flag:"TESTONLY-test-name-env"` 341 342 // TestOnlyAFSSyscallPanic should only be used in tests. It enables the 343 // alternate behaviour for afs_syscall to trigger a Go-runtime panic upon being 344 // called. This is useful for tests exercising gVisor panic-reporting. 345 TestOnlyAFSSyscallPanic bool `flag:"TESTONLY-afs-syscall-panic"` 346 347 // explicitlySet contains whether a flag was explicitly set on the command-line from which this 348 // Config was constructed. Nil when the Config was not initialized from a FlagSet. 349 explicitlySet map[string]struct{} 350 351 // ReproduceNAT, when true, tells runsc to scrape the host network 352 // namespace's NAT iptables and reproduce it inside the sandbox. 353 ReproduceNAT bool `flag:"reproduce-nat"` 354 355 // ReproduceNftables attempts to scrape nftables routing rules if 356 // present, and reproduce them in the sandbox. 357 ReproduceNftables bool `flag:"reproduce-nftables"` 358 359 // NetDisconnectOk indicates whether the link endpoint capability 360 // CapabilityDisconnectOk should be set. This allows open connections to be 361 // disconnected upon save. 362 NetDisconnectOk bool `flag:"net-disconnect-ok"` 363 364 // TestOnlyAutosaveImagePath if not empty enables auto save for syscall tests 365 // and stores the directory path to the saved state file. 366 TestOnlyAutosaveImagePath string `flag:"TESTONLY-autosave-image-path"` 367 368 // TestOnlyAutosaveResume indicates save resume for syscall tests. 369 TestOnlyAutosaveResume bool `flag:"TESTONLY-autosave-resume"` 370 } 371 372 func (c *Config) validate() error { 373 if c.Overlay && c.Overlay2.Enabled() { 374 // Deprecated flag was used together with flag that replaced it. 375 return fmt.Errorf("overlay flag has been replaced with overlay2 flag") 376 } 377 if overlay2 := c.GetOverlay2(); c.FileAccess == FileAccessShared && overlay2.Enabled() { 378 return fmt.Errorf("overlay flag is incompatible with shared file access for rootfs") 379 } 380 if c.NumNetworkChannels <= 0 { 381 return fmt.Errorf("num_network_channels must be > 0, got: %d", c.NumNetworkChannels) 382 } 383 // Require profile flags to explicitly opt-in to profiling with 384 // -profile rather than implying it since these options have security 385 // implications. 386 if c.ProfileBlock != "" && !c.ProfileEnable { 387 return fmt.Errorf("profile-block flag requires enabling profiling with profile flag") 388 } 389 if c.ProfileCPU != "" && !c.ProfileEnable { 390 return fmt.Errorf("profile-cpu flag requires enabling profiling with profile flag") 391 } 392 if c.ProfileHeap != "" && !c.ProfileEnable { 393 return fmt.Errorf("profile-heap flag requires enabling profiling with profile flag") 394 } 395 if c.ProfileMutex != "" && !c.ProfileEnable { 396 return fmt.Errorf("profile-mutex flag requires enabling profiling with profile flag") 397 } 398 if c.FSGoferHostUDS && c.HostUDS != HostUDSNone { 399 // Deprecated flag was used together with flag that replaced it. 400 return fmt.Errorf("fsgofer-host-uds has been replaced with host-uds flag") 401 } 402 if len(c.ProfilingMetrics) > 0 && len(c.ProfilingMetricsLog) == 0 { 403 return fmt.Errorf("profiling-metrics flag requires defining a profiling-metrics-log for output") 404 } 405 return nil 406 } 407 408 // Log logs important aspects of the configuration to the given log function. 409 func (c *Config) Log() { 410 log.Infof("Platform: %v", c.Platform) 411 log.Infof("RootDir: %s", c.RootDir) 412 log.Infof("FileAccess: %v / Directfs: %t / Overlay: %v", c.FileAccess, c.DirectFS, c.GetOverlay2()) 413 log.Infof("Network: %v", c.Network) 414 if c.Debug || c.Strace { 415 log.Infof("Debug: %t. Strace: %t, max size: %d, syscalls: %s", c.Debug, c.Strace, c.StraceLogSize, c.StraceSyscalls) 416 } 417 if c.Debug { 418 obj := reflect.ValueOf(c).Elem() 419 st := obj.Type() 420 for i := 0; i < st.NumField(); i++ { 421 f := st.Field(i) 422 var val any 423 if strVal := obj.Field(i).String(); strVal == "" { 424 val = "(empty)" 425 } else if !f.IsExported() { 426 // Cannot convert to `interface{}` for non-exported fields, 427 // so just use `strVal`. 428 val = fmt.Sprintf("%s (unexported)", strVal) 429 } else { 430 val = obj.Field(i).Interface() 431 } 432 if flagName, hasFlag := f.Tag.Lookup("flag"); hasFlag { 433 log.Debugf("Config.%s (--%s): %v", f.Name, flagName, val) 434 } else { 435 log.Debugf("Config.%s: %v", f.Name, val) 436 } 437 } 438 } 439 } 440 441 // GetHostUDS returns the FS gofer communication that is allowed, taking into 442 // consideration all flags what affect the result. 443 func (c *Config) GetHostUDS() HostUDS { 444 if c.FSGoferHostUDS { 445 if c.HostUDS != HostUDSNone { 446 panic(fmt.Sprintf("HostUDS cannot be set when --fsgofer-host-uds=true")) 447 } 448 // Using deprecated flag, honor it to avoid breaking users. 449 return HostUDSOpen 450 } 451 return c.HostUDS 452 } 453 454 // GetOverlay2 returns the overlay configuration, taking into consideration all 455 // flags that affect the result. 456 func (c *Config) GetOverlay2() Overlay2 { 457 if c.Overlay { 458 if c.Overlay2.Enabled() { 459 panic(fmt.Sprintf("Overlay2 cannot be set when --overlay=true")) 460 } 461 // Using a deprecated flag, honor it to avoid breaking users. 462 return Overlay2{rootMount: true, subMounts: true, medium: "memory"} 463 } 464 return c.Overlay2 465 } 466 467 // Bundle is a set of flag name-value pairs. 468 type Bundle map[string]string 469 470 // BundleName is a human-friendly name for a Bundle. 471 // It is used as part of an annotation to specify that the user wants to apply a Bundle. 472 type BundleName string 473 474 // Validate validates that given flag string values map to actual flags in runsc. 475 func (b Bundle) Validate() error { 476 flagSet := flag.NewFlagSet("tmp", flag.ContinueOnError) 477 RegisterFlags(flagSet) 478 for key, val := range b { 479 flag := flagSet.Lookup(key) 480 if flag == nil { 481 return fmt.Errorf("unknown flag %q", key) 482 } 483 if err := flagSet.Set(key, val); err != nil { 484 return err 485 } 486 } 487 return nil 488 } 489 490 // MetricMetadataKeys is the set of keys of metric metadata labels 491 // as returned by `Config.MetricMetadata`. 492 var MetricMetadataKeys = []string{ 493 "version", 494 "platform", 495 "network", 496 "numcores", 497 "coretags", 498 "overlay", 499 "fsmode", 500 "cpuarch", 501 "go", 502 "experiment", 503 } 504 505 // MetricMetadata returns key-value pairs that are useful to include in metrics 506 // exported about the sandbox this config represents. 507 // It must return the same set of labels as listed in `MetricMetadataKeys`. 508 func (c *Config) MetricMetadata() map[string]string { 509 var fsMode = "goferfs" 510 if c.DirectFS { 511 fsMode = "directfs" 512 } 513 return map[string]string{ 514 "version": version.Version(), 515 "platform": c.Platform, 516 "network": c.Network.String(), 517 "numcores": strconv.Itoa(runtime.NumCPU()), 518 "coretags": strconv.FormatBool(c.EnableCoreTags), 519 "overlay": c.Overlay2.String(), 520 "fsmode": fsMode, 521 "cpuarch": runtime.GOARCH, 522 "go": runtime.Version(), 523 // The "experiment" label is currently unused, but may be used to contain 524 // extra information about e.g. an experiment that may be enabled. 525 "experiment": "", 526 } 527 } 528 529 // FileAccessType tells how the filesystem is accessed. 530 type FileAccessType int 531 532 const ( 533 // FileAccessExclusive gives the sandbox exclusive access over files and 534 // directories in the filesystem. No external modifications are permitted and 535 // can lead to undefined behavior. 536 // 537 // Exclusive filesystem access enables more aggressive caching and offers 538 // significantly better performance. This is the default mode for the root 539 // volume. 540 FileAccessExclusive FileAccessType = iota 541 542 // FileAccessShared is used for volumes that can have external changes. It 543 // requires revalidation on every filesystem access to detect external 544 // changes, and reduces the amount of caching that can be done. This is the 545 // default mode for non-root volumes. 546 FileAccessShared 547 ) 548 549 func fileAccessTypePtr(v FileAccessType) *FileAccessType { 550 return &v 551 } 552 553 // Set implements flag.Value. Set(String()) should be idempotent. 554 func (f *FileAccessType) Set(v string) error { 555 switch v { 556 case "shared": 557 *f = FileAccessShared 558 case "exclusive": 559 *f = FileAccessExclusive 560 default: 561 return fmt.Errorf("invalid file access type %q", v) 562 } 563 return nil 564 } 565 566 // Get implements flag.Value. 567 func (f *FileAccessType) Get() any { 568 return *f 569 } 570 571 // String implements flag.Value. 572 func (f FileAccessType) String() string { 573 switch f { 574 case FileAccessShared: 575 return "shared" 576 case FileAccessExclusive: 577 return "exclusive" 578 } 579 panic(fmt.Sprintf("Invalid file access type %d", f)) 580 } 581 582 // NetworkType tells which network stack to use. 583 type NetworkType int 584 585 const ( 586 // NetworkSandbox uses internal network stack, isolated from the host. 587 NetworkSandbox NetworkType = iota 588 589 // NetworkHost redirects network related syscalls to the host network. 590 NetworkHost 591 592 // NetworkNone sets up just loopback using netstack. 593 NetworkNone 594 ) 595 596 func networkTypePtr(v NetworkType) *NetworkType { 597 return &v 598 } 599 600 // Set implements flag.Value. Set(String()) should be idempotent. 601 func (n *NetworkType) Set(v string) error { 602 switch v { 603 case "sandbox": 604 *n = NetworkSandbox 605 case "host": 606 *n = NetworkHost 607 case "none": 608 *n = NetworkNone 609 default: 610 return fmt.Errorf("invalid network type %q", v) 611 } 612 return nil 613 } 614 615 // Get implements flag.Value. 616 func (n *NetworkType) Get() any { 617 return *n 618 } 619 620 // String implements flag.Value. 621 func (n NetworkType) String() string { 622 switch n { 623 case NetworkSandbox: 624 return "sandbox" 625 case NetworkHost: 626 return "host" 627 case NetworkNone: 628 return "none" 629 } 630 panic(fmt.Sprintf("Invalid network type %d", n)) 631 } 632 633 // QueueingDiscipline is used to specify the kind of Queueing Discipline to 634 // apply for a give FDBasedLink. 635 type QueueingDiscipline int 636 637 const ( 638 // QDiscNone disables any queueing for the underlying FD. 639 QDiscNone QueueingDiscipline = iota 640 641 // QDiscFIFO applies a simple fifo based queue to the underlying FD. 642 QDiscFIFO 643 ) 644 645 func queueingDisciplinePtr(v QueueingDiscipline) *QueueingDiscipline { 646 return &v 647 } 648 649 // Set implements flag.Value. Set(String()) should be idempotent. 650 func (q *QueueingDiscipline) Set(v string) error { 651 switch v { 652 case "none": 653 *q = QDiscNone 654 case "fifo": 655 *q = QDiscFIFO 656 default: 657 return fmt.Errorf("invalid qdisc %q", v) 658 } 659 return nil 660 } 661 662 // Get implements flag.Value. 663 func (q *QueueingDiscipline) Get() any { 664 return *q 665 } 666 667 // String implements flag.Value. 668 func (q QueueingDiscipline) String() string { 669 switch q { 670 case QDiscNone: 671 return "none" 672 case QDiscFIFO: 673 return "fifo" 674 } 675 panic(fmt.Sprintf("Invalid qdisc %d", q)) 676 } 677 678 func leakModePtr(v refs.LeakMode) *refs.LeakMode { 679 return &v 680 } 681 682 func watchdogActionPtr(v watchdog.Action) *watchdog.Action { 683 return &v 684 } 685 686 // HostUDS tells how much of the host UDS the file system has access to. 687 type HostUDS int 688 689 const ( 690 // HostUDSNone doesn't allows UDS from the host to be manipulated. 691 HostUDSNone HostUDS = 0x0 692 693 // HostUDSOpen allows UDS from the host to be opened, e.g. connect(2). 694 HostUDSOpen HostUDS = 0x1 695 696 // HostUDSCreate allows UDS from the host to be created, e.g. bind(2). 697 HostUDSCreate HostUDS = 0x2 698 699 // HostUDSAll allows all form of communication with the host through UDS. 700 HostUDSAll = HostUDSOpen | HostUDSCreate 701 ) 702 703 func hostUDSPtr(v HostUDS) *HostUDS { 704 return &v 705 } 706 707 // Set implements flag.Value. Set(String()) should be idempotent. 708 func (g *HostUDS) Set(v string) error { 709 switch v { 710 case "", "none": 711 *g = HostUDSNone 712 case "open": 713 *g = HostUDSOpen 714 case "create": 715 *g = HostUDSCreate 716 case "all": 717 *g = HostUDSAll 718 default: 719 return fmt.Errorf("invalid host UDS type %q", v) 720 } 721 return nil 722 } 723 724 // Get implements flag.Value. 725 func (g *HostUDS) Get() any { 726 return *g 727 } 728 729 // String implements flag.Value. 730 func (g HostUDS) String() string { 731 switch g { 732 case HostUDSNone: 733 return "none" 734 case HostUDSOpen: 735 return "open" 736 case HostUDSCreate: 737 return "create" 738 case HostUDSAll: 739 return "all" 740 default: 741 panic(fmt.Sprintf("Invalid host UDS type %d", g)) 742 } 743 } 744 745 // AllowOpen returns true if it can consume UDS from the host. 746 func (g HostUDS) AllowOpen() bool { 747 return g&HostUDSOpen != 0 748 } 749 750 // AllowCreate returns true if it can create UDS in the host. 751 func (g HostUDS) AllowCreate() bool { 752 return g&HostUDSCreate != 0 753 } 754 755 // HostFifo tells how much of the host FIFO (or named pipes) the file system has 756 // access to. 757 type HostFifo int 758 759 const ( 760 // HostFifoNone doesn't allow FIFO from the host to be manipulated. 761 HostFifoNone HostFifo = 0x0 762 763 // HostFifoOpen allows FIFOs from the host to be opened. 764 HostFifoOpen HostFifo = 0x1 765 ) 766 767 func hostFifoPtr(v HostFifo) *HostFifo { 768 return &v 769 } 770 771 // Set implements flag.Value. Set(String()) should be idempotent. 772 func (g *HostFifo) Set(v string) error { 773 switch v { 774 case "", "none": 775 *g = HostFifoNone 776 case "open": 777 *g = HostFifoOpen 778 default: 779 return fmt.Errorf("invalid host fifo type %q", v) 780 } 781 return nil 782 } 783 784 // Get implements flag.Value. 785 func (g *HostFifo) Get() any { 786 return *g 787 } 788 789 // String implements flag.Value. 790 func (g HostFifo) String() string { 791 switch g { 792 case HostFifoNone: 793 return "none" 794 case HostFifoOpen: 795 return "open" 796 default: 797 panic(fmt.Sprintf("Invalid host fifo type %d", g)) 798 } 799 } 800 801 // AllowOpen returns true if it can consume FIFOs from the host. 802 func (g HostFifo) AllowOpen() bool { 803 return g&HostFifoOpen != 0 804 } 805 806 // OverlayMedium describes how overlay medium is configured. 807 type OverlayMedium string 808 809 const ( 810 // NoOverlay indicates that no overlay will be applied. 811 NoOverlay = OverlayMedium("") 812 813 // MemoryOverlay indicates that the overlay is backed by app memory. 814 MemoryOverlay = OverlayMedium("memory") 815 816 // SelfOverlay indicates that the overlaid mount is backed by itself. 817 SelfOverlay = OverlayMedium("self") 818 819 // AnonOverlayPrefix is the prefix that users should specify in the 820 // config for the anonymous overlay. 821 AnonOverlayPrefix = "dir=" 822 ) 823 824 // String returns a human-readable string representing the overlay medium config. 825 func (m OverlayMedium) String() string { 826 return string(m) 827 } 828 829 // Set sets the value. Set(String()) should be idempotent. 830 func (m *OverlayMedium) Set(v string) error { 831 switch OverlayMedium(v) { 832 case NoOverlay, MemoryOverlay, SelfOverlay: // OK 833 default: 834 if !strings.HasPrefix(v, AnonOverlayPrefix) { 835 return fmt.Errorf("unexpected medium: %q", v) 836 } 837 if hostFileDir := strings.TrimPrefix(v, AnonOverlayPrefix); !filepath.IsAbs(hostFileDir) { 838 return fmt.Errorf("overlay host file directory should be an absolute path, got %q", hostFileDir) 839 } 840 } 841 *m = OverlayMedium(v) 842 return nil 843 } 844 845 // IsBackedByAnon indicates whether the overlaid mount is backed by a host file 846 // in an anonymous directory. 847 func (m OverlayMedium) IsBackedByAnon() bool { 848 return strings.HasPrefix(string(m), AnonOverlayPrefix) 849 } 850 851 // HostFileDir indicates the directory in which the overlay-backing host file 852 // should be created. 853 // 854 // Precondition: m.IsBackedByAnon(). 855 func (m OverlayMedium) HostFileDir() string { 856 if !m.IsBackedByAnon() { 857 panic(fmt.Sprintf("anonymous overlay medium = %q does not have %v prefix", m, AnonOverlayPrefix)) 858 } 859 return strings.TrimPrefix(string(m), AnonOverlayPrefix) 860 } 861 862 // Overlay2 holds the configuration for setting up overlay filesystems for the 863 // container. 864 type Overlay2 struct { 865 rootMount bool 866 subMounts bool 867 medium OverlayMedium 868 } 869 870 func defaultOverlay2() *Overlay2 { 871 // Rootfs overlay is enabled by default and backed by a file in rootfs itself. 872 return &Overlay2{rootMount: true, subMounts: false, medium: SelfOverlay} 873 } 874 875 // Set implements flag.Value. Set(String()) should be idempotent. 876 func (o *Overlay2) Set(v string) error { 877 if v == "none" { 878 o.rootMount = false 879 o.subMounts = false 880 o.medium = NoOverlay 881 return nil 882 } 883 vs := strings.Split(v, ":") 884 if len(vs) != 2 { 885 return fmt.Errorf("expected format is --overlay2={mount}:{medium}, got %q", v) 886 } 887 888 switch mount := vs[0]; mount { 889 case "root": 890 o.rootMount = true 891 case "all": 892 o.rootMount = true 893 o.subMounts = true 894 default: 895 return fmt.Errorf("unexpected mount specifier for --overlay2: %q", mount) 896 } 897 898 return o.medium.Set(vs[1]) 899 } 900 901 // Get implements flag.Value. 902 func (o *Overlay2) Get() any { 903 return *o 904 } 905 906 // String implements flag.Value. 907 func (o Overlay2) String() string { 908 if !o.rootMount && !o.subMounts { 909 return "none" 910 } 911 res := "" 912 switch { 913 case o.rootMount && o.subMounts: 914 res = "all" 915 case o.rootMount: 916 res = "root" 917 default: 918 panic("invalid state of subMounts = true and rootMount = false") 919 } 920 return res + ":" + o.medium.String() 921 } 922 923 // Enabled returns true if the overlay option is enabled for any mounts. 924 func (o *Overlay2) Enabled() bool { 925 return o.medium != NoOverlay 926 } 927 928 // RootOverlayMedium returns the overlay medium config of the root mount. 929 func (o *Overlay2) RootOverlayMedium() OverlayMedium { 930 if !o.rootMount { 931 return NoOverlay 932 } 933 return o.medium 934 } 935 936 // SubMountOverlayMedium returns the overlay medium config of submounts. 937 func (o *Overlay2) SubMountOverlayMedium() OverlayMedium { 938 if !o.subMounts { 939 return NoOverlay 940 } 941 return o.medium 942 } 943 944 // XDP holds configuration for whether and how to use XDP. 945 type XDP struct { 946 Mode XDPMode 947 IfaceName string 948 } 949 950 // XDPMode specifies a particular use of XDP. 951 type XDPMode int 952 953 const ( 954 // XDPModeOff doesn't use XDP. 955 XDPModeOff XDPMode = iota 956 957 // XDPModeNS uses an AF_XDP socket to read from the VETH device inside 958 // the container's network namespace. 959 XDPModeNS 960 961 // XDPModeRedirect uses an AF_XDP socket on the host NIC to bypass the 962 // Linux network stack. 963 XDPModeRedirect 964 965 // XDPModeTunnel uses XDP_REDIRECT to redirect packets directy from the 966 // host NIC to the VETH device inside the container's network 967 // namespace. Packets are read from the VETH via AF_XDP, as in 968 // XDPModeNS. 969 XDPModeTunnel 970 ) 971 972 const ( 973 xdpModeStrOff = "off" 974 xdpModeStrNS = "ns" 975 xdpModeStrRedirect = "redirect" 976 xdpModeStrTunnel = "tunnel" 977 ) 978 979 var xdpConfig XDP 980 981 // Get implements flag.Getter. 982 func (xd *XDP) Get() any { 983 return *xd 984 } 985 986 // String implements flag.Getter. 987 func (xd *XDP) String() string { 988 switch xd.Mode { 989 case XDPModeOff: 990 return xdpModeStrOff 991 case XDPModeNS: 992 return xdpModeStrNS 993 case XDPModeRedirect: 994 return fmt.Sprintf("%s:%s", xdpModeStrRedirect, xd.IfaceName) 995 case XDPModeTunnel: 996 return fmt.Sprintf("%s:%s", xdpModeStrTunnel, xd.IfaceName) 997 default: 998 panic(fmt.Sprintf("unknown mode %d", xd.Mode)) 999 } 1000 } 1001 1002 // Set implements flag.Getter. 1003 func (xd *XDP) Set(input string) error { 1004 parts := strings.Split(input, ":") 1005 if len(parts) > 2 { 1006 return fmt.Errorf("invalid --xdp value: %q", input) 1007 } 1008 1009 switch { 1010 case input == xdpModeStrOff: 1011 xd.Mode = XDPModeOff 1012 xd.IfaceName = "" 1013 case input == xdpModeStrNS: 1014 xd.Mode = XDPModeNS 1015 xd.IfaceName = "" 1016 case len(parts) == 2 && parts[0] == xdpModeStrRedirect && parts[1] != "": 1017 xd.Mode = XDPModeRedirect 1018 xd.IfaceName = parts[1] 1019 case len(parts) == 2 && parts[0] == xdpModeStrTunnel && parts[1] != "": 1020 xd.Mode = XDPModeTunnel 1021 xd.IfaceName = parts[1] 1022 default: 1023 return fmt.Errorf("invalid --xdp value: %q", input) 1024 } 1025 return nil 1026 }