github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/runsc/config/config.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package config provides basic infrastructure to set configuration settings
    16  // for runsc. The configuration is set by flags to the command line. They can
    17  // also propagate to a different process using the same flags.
    18  package config
    19  
    20  import (
    21  	"fmt"
    22  	"path/filepath"
    23  	"runtime"
    24  	"strconv"
    25  	"strings"
    26  	"time"
    27  
    28  	"github.com/nicocha30/gvisor-ligolo/pkg/refs"
    29  	"github.com/nicocha30/gvisor-ligolo/pkg/sentry/watchdog"
    30  	"github.com/nicocha30/gvisor-ligolo/runsc/flag"
    31  	"github.com/nicocha30/gvisor-ligolo/runsc/version"
    32  )
    33  
    34  // Config holds configuration that is not part of the runtime spec.
    35  //
    36  // Follow these steps to add a new flag:
    37  //  1. Create a new field in Config.
    38  //  2. Add a field tag with the flag name
    39  //  3. Register a new flag in flags.go, with same name and add a description
    40  //  4. Add any necessary validation into validate()
    41  //  5. If adding an enum, follow the same pattern as FileAccessType
    42  //  6. Evaluate if the flag can be changed with OCI annotations. See
    43  //     overrideAllowlist for more details
    44  type Config struct {
    45  	// RootDir is the runtime root directory.
    46  	RootDir string `flag:"root"`
    47  
    48  	// Traceback changes the Go runtime's traceback level.
    49  	Traceback string `flag:"traceback"`
    50  
    51  	// Debug indicates that debug logging should be enabled.
    52  	Debug bool `flag:"debug"`
    53  
    54  	// LogFilename is the filename to log to, if not empty.
    55  	LogFilename string `flag:"log"`
    56  
    57  	// LogFormat is the log format.
    58  	LogFormat string `flag:"log-format"`
    59  
    60  	// DebugLog is the path to log debug information to, if not empty.
    61  	DebugLog string `flag:"debug-log"`
    62  
    63  	// DebugCommand is a comma-separated list of commands to be debugged if
    64  	// --debug-log is also set. Empty means debug all. "!" negates the expression.
    65  	// E.g. "create,start" or "!boot,events".
    66  	DebugCommand string `flag:"debug-command"`
    67  
    68  	// PanicLog is the path to log GO's runtime messages, if not empty.
    69  	PanicLog string `flag:"panic-log"`
    70  
    71  	// CoverageReport is the path to write Go coverage information, if not empty.
    72  	CoverageReport string `flag:"coverage-report"`
    73  
    74  	// DebugLogFormat is the log format for debug.
    75  	DebugLogFormat string `flag:"debug-log-format"`
    76  
    77  	// FileAccess indicates how the root filesystem is accessed.
    78  	FileAccess FileAccessType `flag:"file-access"`
    79  
    80  	// FileAccessMounts indicates how non-root volumes are accessed.
    81  	FileAccessMounts FileAccessType `flag:"file-access-mounts"`
    82  
    83  	// Overlay is whether to wrap all mounts in an overlay. The upper tmpfs layer
    84  	// will be backed by application memory.
    85  	Overlay bool `flag:"overlay"`
    86  
    87  	// Overlay2 holds configuration about wrapping mounts in overlayfs.
    88  	// DO NOT call it directly, use GetOverlay2() instead.
    89  	Overlay2 Overlay2 `flag:"overlay2"`
    90  
    91  	// FSGoferHostUDS is deprecated: use host-uds=all.
    92  	FSGoferHostUDS bool `flag:"fsgofer-host-uds"`
    93  
    94  	// HostUDS controls permission to access host Unix-domain sockets.
    95  	// DO NOT call it directly, use GetHostUDS() instead.
    96  	HostUDS HostUDS `flag:"host-uds"`
    97  
    98  	// HostFifo controls permission to access host FIFO (or named pipes).
    99  	HostFifo HostFifo `flag:"host-fifo"`
   100  
   101  	// Network indicates what type of network to use.
   102  	Network NetworkType `flag:"network"`
   103  
   104  	// EnableRaw indicates whether raw sockets should be enabled. Raw
   105  	// sockets are disabled by stripping CAP_NET_RAW from the list of
   106  	// capabilities.
   107  	EnableRaw bool `flag:"net-raw"`
   108  
   109  	// AllowPacketEndpointWrite enables write operations on packet endpoints.
   110  	AllowPacketEndpointWrite bool `flag:"TESTONLY-allow-packet-endpoint-write"`
   111  
   112  	// HostGSO indicates that host segmentation offload is enabled.
   113  	HostGSO bool `flag:"gso"`
   114  
   115  	// GvisorGSO indicates that gVisor segmentation offload is enabled. The flag
   116  	// retains its old name of "software" GSO for API consistency.
   117  	GvisorGSO bool `flag:"software-gso"`
   118  
   119  	// GvisorGROTimeout sets gVisor's generic receive offload timeout. Zero
   120  	// bypasses GRO.
   121  	GvisorGROTimeout time.Duration `flag:"gvisor-gro"`
   122  
   123  	// TXChecksumOffload indicates that TX Checksum Offload is enabled.
   124  	TXChecksumOffload bool `flag:"tx-checksum-offload"`
   125  
   126  	// RXChecksumOffload indicates that RX Checksum Offload is enabled.
   127  	RXChecksumOffload bool `flag:"rx-checksum-offload"`
   128  
   129  	// QDisc indicates the type of queuening discipline to use by default
   130  	// for non-loopback interfaces.
   131  	QDisc QueueingDiscipline `flag:"qdisc"`
   132  
   133  	// LogPackets indicates that all network packets should be logged.
   134  	LogPackets bool `flag:"log-packets"`
   135  
   136  	// PCAP is a file to which network packets should be logged in PCAP format.
   137  	PCAP string `flag:"pcap-log"`
   138  
   139  	// Platform is the platform to run on.
   140  	Platform string `flag:"platform"`
   141  
   142  	// PlatformDevicePath is the path to the device file used by the platform.
   143  	// e.g. "/dev/kvm" for the KVM platform.
   144  	// If unset, a sane platform-specific default will be used.
   145  	PlatformDevicePath string `flag:"platform_device_path"`
   146  
   147  	// MetricServer, if set, indicates that metrics should be exported on this address.
   148  	// This may either be 1) "addr:port" to export metrics on a specific network interface address,
   149  	// 2) ":port" for exporting metrics on all addresses, or 3) an absolute path to a Unix Domain
   150  	// Socket.
   151  	// The substring "%ID%" will be replaced by the container ID, and "%RUNTIME_ROOT%" by the root.
   152  	// This flag must be specified *both* as part of the `runsc metric-server` arguments (so that the
   153  	// metric server knows which address to bind to), and as part of the `runsc create` arguments (as
   154  	// an indication that the container being created wishes that its metrics should be exported).
   155  	// The value of this flag must also match across the two command lines.
   156  	MetricServer string `flag:"metric-server"`
   157  
   158  	// Strace indicates that strace should be enabled.
   159  	Strace bool `flag:"strace"`
   160  
   161  	// StraceSyscalls is the set of syscalls to trace (comma-separated values).
   162  	// If StraceEnable is true and this string is empty, then all syscalls will
   163  	// be traced.
   164  	StraceSyscalls string `flag:"strace-syscalls"`
   165  
   166  	// StraceLogSize is the max size of data blobs to display.
   167  	StraceLogSize uint `flag:"strace-log-size"`
   168  
   169  	// StraceEvent indicates sending strace to events if true. Strace is
   170  	// sent to log if false.
   171  	StraceEvent bool `flag:"strace-event"`
   172  
   173  	// DisableSeccomp indicates whether seccomp syscall filters should be
   174  	// disabled. Pardon the double negation, but default to enabled is important.
   175  	DisableSeccomp bool
   176  
   177  	// EnableCoreTags indicates whether the Sentry process and children will be
   178  	// run in a core tagged process. This isolates the sentry from sharing
   179  	// physical cores with other core tagged processes. This is useful as a
   180  	// mitigation for hyperthreading side channel based attacks. Requires host
   181  	// linux kernel >= 5.14.
   182  	EnableCoreTags bool `flag:"enable-core-tags"`
   183  
   184  	// WatchdogAction sets what action the watchdog takes when triggered.
   185  	WatchdogAction watchdog.Action `flag:"watchdog-action"`
   186  
   187  	// PanicSignal registers signal handling that panics. Usually set to
   188  	// SIGUSR2(12) to troubleshoot hangs. -1 disables it.
   189  	PanicSignal int `flag:"panic-signal"`
   190  
   191  	// ProfileEnable is set to prepare the sandbox to be profiled.
   192  	ProfileEnable bool `flag:"profile"`
   193  
   194  	// ProfileBlock collects a block profile to the passed file for the
   195  	// duration of the container execution. Requires ProfileEnabled.
   196  	ProfileBlock string `flag:"profile-block"`
   197  
   198  	// ProfileCPU collects a CPU profile to the passed file for the
   199  	// duration of the container execution. Requires ProfileEnabled.
   200  	ProfileCPU string `flag:"profile-cpu"`
   201  
   202  	// ProfileHeap collects a heap profile to the passed file for the
   203  	// duration of the container execution. Requires ProfileEnabled.
   204  	ProfileHeap string `flag:"profile-heap"`
   205  
   206  	// ProfileMutex collects a mutex profile to the passed file for the
   207  	// duration of the container execution. Requires ProfileEnabled.
   208  	ProfileMutex string `flag:"profile-mutex"`
   209  
   210  	// TraceFile collects a Go runtime execution trace to the passed file
   211  	// for the duration of the container execution.
   212  	TraceFile string `flag:"trace"`
   213  
   214  	// RestoreFile is the path to the saved container image.
   215  	RestoreFile string
   216  
   217  	// NumNetworkChannels controls the number of AF_PACKET sockets that map
   218  	// to the same underlying network device. This allows netstack to better
   219  	// scale for high throughput use cases.
   220  	NumNetworkChannels int `flag:"num-network-channels"`
   221  
   222  	// Rootless allows the sandbox to be started with a user that is not root.
   223  	// Defense in depth measures are weaker in rootless mode. Specifically, the
   224  	// sandbox and Gofer process run as root inside a user namespace with root
   225  	// mapped to the caller's user. When using rootless, the container root path
   226  	// should not have a symlink.
   227  	Rootless bool `flag:"rootless"`
   228  
   229  	// AlsoLogToStderr allows to send log messages to stderr.
   230  	AlsoLogToStderr bool `flag:"alsologtostderr"`
   231  
   232  	// ReferenceLeakMode sets reference leak check mode
   233  	ReferenceLeak refs.LeakMode `flag:"ref-leak-mode"`
   234  
   235  	// CPUNumFromQuota sets CPU number count to available CPU quota, using
   236  	// least integer value greater than or equal to quota.
   237  	//
   238  	// E.g. 0.2 CPU quota will result in 1, and 1.9 in 2.
   239  	CPUNumFromQuota bool `flag:"cpu-num-from-quota"`
   240  
   241  	// Allows overriding of flags in OCI annotations.
   242  	AllowFlagOverride bool `flag:"allow-flag-override"`
   243  
   244  	// Enables seccomp inside the sandbox.
   245  	OCISeccomp bool `flag:"oci-seccomp"`
   246  
   247  	// Mounts the cgroup filesystem backed by the sentry's cgroupfs.
   248  	Cgroupfs bool `flag:"cgroupfs"`
   249  
   250  	// Don't configure cgroups.
   251  	IgnoreCgroups bool `flag:"ignore-cgroups"`
   252  
   253  	// Use systemd to configure cgroups.
   254  	SystemdCgroup bool `flag:"systemd-cgroup"`
   255  
   256  	// PodInitConfig is the path to configuration file with additional steps to
   257  	// take during pod creation.
   258  	PodInitConfig string `flag:"pod-init-config"`
   259  
   260  	// Use pools to manage buffer memory instead of heap.
   261  	BufferPooling bool `flag:"buffer-pooling"`
   262  
   263  	// AFXDP defines whether to use an AF_XDP socket to receive packets
   264  	// (rather than AF_PACKET). Enabling it disables RX checksum offload.
   265  	AFXDP bool `flag:"EXPERIMENTAL-afxdp"`
   266  
   267  	// FDLimit specifies a limit on the number of host file descriptors that can
   268  	// be open simultaneously by the sentry and gofer. It applies separately to
   269  	// each.
   270  	FDLimit int `flag:"fdlimit"`
   271  
   272  	// DCache sets the global dirent cache size. If zero, per-mount caches are
   273  	// used.
   274  	DCache int `flag:"dcache"`
   275  
   276  	// IOUring enables support for the IO_URING API calls to perform
   277  	// asynchronous I/O operations.
   278  	IOUring bool `flag:"iouring"`
   279  
   280  	// DirectFS sets up the sandbox to directly access/mutate the filesystem from
   281  	// the sentry. Sentry runs with escalated privileges. Gofer process still
   282  	// exists, but is mostly idle. Not supported in rootless mode.
   283  	DirectFS bool `flag:"directfs"`
   284  
   285  	// NVProxy enables support for Nvidia GPUs.
   286  	NVProxy bool `flag:"nvproxy"`
   287  
   288  	// NVProxyDocker exposes GPUs to containers based on the
   289  	// NVIDIA_VISIBLE_DEVICES container environment variable, as requested by
   290  	// containers or set by `docker --gpus`.
   291  	NVProxyDocker bool `flag:"nvproxy-docker"`
   292  
   293  	// TPUProxy enables support for TPUs.
   294  	TPUProxy bool `flag:"tpuproxy"`
   295  
   296  	// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
   297  	// tests. It allows runsc to start the sandbox process as the current
   298  	// user, and without chrooting the sandbox process. This can be
   299  	// necessary in test environments that have limited capabilities. When
   300  	// disabling chroot, the container root path should not have a symlink.
   301  	TestOnlyAllowRunAsCurrentUserWithoutChroot bool `flag:"TESTONLY-unsafe-nonroot"`
   302  
   303  	// TestOnlyTestNameEnv should only be used in tests. It looks up for the
   304  	// test name in the container environment variables and adds it to the debug
   305  	// log file name. This is done to help identify the log with the test when
   306  	// multiple tests are run in parallel, since there is no way to pass
   307  	// parameters to the runtime from docker.
   308  	TestOnlyTestNameEnv string `flag:"TESTONLY-test-name-env"`
   309  
   310  	// TestOnlyAFSSyscallPanic should only be used in tests. It enables the
   311  	// alternate behaviour for afs_syscall to trigger a Go-runtime panic upon being
   312  	// called. This is useful for tests exercising gVisor panic-reporting.
   313  	TestOnlyAFSSyscallPanic bool `flag:"TESTONLY-afs-syscall-panic"`
   314  
   315  	// explicitlySet contains whether a flag was explicitly set on the command-line from which this
   316  	// Config was constructed. Nil when the Config was not initialized from a FlagSet.
   317  	explicitlySet map[string]struct{}
   318  }
   319  
   320  func (c *Config) validate() error {
   321  	if c.Overlay && c.Overlay2.Enabled() {
   322  		// Deprecated flag was used together with flag that replaced it.
   323  		return fmt.Errorf("overlay flag has been replaced with overlay2 flag")
   324  	}
   325  	if overlay2 := c.GetOverlay2(); c.FileAccess == FileAccessShared && overlay2.Enabled() {
   326  		return fmt.Errorf("overlay flag is incompatible with shared file access for rootfs")
   327  	}
   328  	if c.NumNetworkChannels <= 0 {
   329  		return fmt.Errorf("num_network_channels must be > 0, got: %d", c.NumNetworkChannels)
   330  	}
   331  	// Require profile flags to explicitly opt-in to profiling with
   332  	// -profile rather than implying it since these options have security
   333  	// implications.
   334  	if c.ProfileBlock != "" && !c.ProfileEnable {
   335  		return fmt.Errorf("profile-block flag requires enabling profiling with profile flag")
   336  	}
   337  	if c.ProfileCPU != "" && !c.ProfileEnable {
   338  		return fmt.Errorf("profile-cpu flag requires enabling profiling with profile flag")
   339  	}
   340  	if c.ProfileHeap != "" && !c.ProfileEnable {
   341  		return fmt.Errorf("profile-heap flag requires enabling profiling with profile flag")
   342  	}
   343  	if c.ProfileMutex != "" && !c.ProfileEnable {
   344  		return fmt.Errorf("profile-mutex flag requires enabling profiling with profile flag")
   345  	}
   346  	if c.FSGoferHostUDS && c.HostUDS != HostUDSNone {
   347  		// Deprecated flag was used together with flag that replaced it.
   348  		return fmt.Errorf("fsgofer-host-uds has been replaced with host-uds flag")
   349  	}
   350  	return nil
   351  }
   352  
   353  // GetHostUDS returns the FS gofer communication that is allowed, taking into
   354  // consideration all flags what affect the result.
   355  func (c *Config) GetHostUDS() HostUDS {
   356  	if c.FSGoferHostUDS {
   357  		if c.HostUDS != HostUDSNone {
   358  			panic(fmt.Sprintf("HostUDS cannot be set when --fsgofer-host-uds=true"))
   359  		}
   360  		// Using deprecated flag, honor it to avoid breaking users.
   361  		return HostUDSOpen
   362  	}
   363  	return c.HostUDS
   364  }
   365  
   366  // GetOverlay2 returns the overlay configuration, taking into consideration all
   367  // flags that affect the result.
   368  func (c *Config) GetOverlay2() Overlay2 {
   369  	if c.Overlay {
   370  		if c.Overlay2.Enabled() {
   371  			panic(fmt.Sprintf("Overlay2 cannot be set when --overlay=true"))
   372  		}
   373  		// Using a deprecated flag, honor it to avoid breaking users.
   374  		return Overlay2{rootMount: true, subMounts: true, medium: "memory"}
   375  	}
   376  	return c.Overlay2
   377  }
   378  
   379  // Bundle is a set of flag name-value pairs.
   380  type Bundle map[string]string
   381  
   382  // BundleName is a human-friendly name for a Bundle.
   383  // It is used as part of an annotation to specify that the user wants to apply a Bundle.
   384  type BundleName string
   385  
   386  // Validate validates that given flag string values map to actual flags in runsc.
   387  func (b Bundle) Validate() error {
   388  	flagSet := flag.NewFlagSet("tmp", flag.ContinueOnError)
   389  	RegisterFlags(flagSet)
   390  	for key, val := range b {
   391  		flag := flagSet.Lookup(key)
   392  		if flag == nil {
   393  			return fmt.Errorf("unknown flag %q", key)
   394  		}
   395  		if err := flagSet.Set(key, val); err != nil {
   396  			return err
   397  		}
   398  	}
   399  	return nil
   400  }
   401  
   402  // MetricMetadataKeys is the set of keys of metric metadata labels
   403  // as returned by `Config.MetricMetadata`.
   404  var MetricMetadataKeys = []string{
   405  	"version",
   406  	"platform",
   407  	"network",
   408  	"numcores",
   409  	"coretags",
   410  	"overlay",
   411  	"fsmode",
   412  	"cpuarch",
   413  	"go",
   414  	"experiment",
   415  }
   416  
   417  // MetricMetadata returns key-value pairs that are useful to include in metrics
   418  // exported about the sandbox this config represents.
   419  // It must return the same set of labels as listed in `MetricMetadataKeys`.
   420  func (c *Config) MetricMetadata() map[string]string {
   421  	var fsMode = "goferfs"
   422  	if c.DirectFS {
   423  		fsMode = "directfs"
   424  	}
   425  	return map[string]string{
   426  		"version":  version.Version(),
   427  		"platform": c.Platform,
   428  		"network":  c.Network.String(),
   429  		"numcores": strconv.Itoa(runtime.NumCPU()),
   430  		"coretags": strconv.FormatBool(c.EnableCoreTags),
   431  		"overlay":  c.Overlay2.String(),
   432  		"fsmode":   fsMode,
   433  		"cpuarch":  runtime.GOARCH,
   434  		"go":       runtime.Version(),
   435  		// The "experiment" label is currently unused, but may be used to contain
   436  		// extra information about e.g. an experiment that may be enabled.
   437  		"experiment": "",
   438  	}
   439  }
   440  
   441  // FileAccessType tells how the filesystem is accessed.
   442  type FileAccessType int
   443  
   444  const (
   445  	// FileAccessExclusive gives the sandbox exclusive access over files and
   446  	// directories in the filesystem. No external modifications are permitted and
   447  	// can lead to undefined behavior.
   448  	//
   449  	// Exclusive filesystem access enables more aggressive caching and offers
   450  	// significantly better performance. This is the default mode for the root
   451  	// volume.
   452  	FileAccessExclusive FileAccessType = iota
   453  
   454  	// FileAccessShared is used for volumes that can have external changes. It
   455  	// requires revalidation on every filesystem access to detect external
   456  	// changes, and reduces the amount of caching that can be done. This is the
   457  	// default mode for non-root volumes.
   458  	FileAccessShared
   459  )
   460  
   461  func fileAccessTypePtr(v FileAccessType) *FileAccessType {
   462  	return &v
   463  }
   464  
   465  // Set implements flag.Value.
   466  func (f *FileAccessType) Set(v string) error {
   467  	switch v {
   468  	case "shared":
   469  		*f = FileAccessShared
   470  	case "exclusive":
   471  		*f = FileAccessExclusive
   472  	default:
   473  		return fmt.Errorf("invalid file access type %q", v)
   474  	}
   475  	return nil
   476  }
   477  
   478  // Get implements flag.Value.
   479  func (f *FileAccessType) Get() any {
   480  	return *f
   481  }
   482  
   483  // String implements flag.Value.
   484  func (f FileAccessType) String() string {
   485  	switch f {
   486  	case FileAccessShared:
   487  		return "shared"
   488  	case FileAccessExclusive:
   489  		return "exclusive"
   490  	}
   491  	panic(fmt.Sprintf("Invalid file access type %d", f))
   492  }
   493  
   494  // NetworkType tells which network stack to use.
   495  type NetworkType int
   496  
   497  const (
   498  	// NetworkSandbox uses internal network stack, isolated from the host.
   499  	NetworkSandbox NetworkType = iota
   500  
   501  	// NetworkHost redirects network related syscalls to the host network.
   502  	NetworkHost
   503  
   504  	// NetworkNone sets up just loopback using netstack.
   505  	NetworkNone
   506  )
   507  
   508  func networkTypePtr(v NetworkType) *NetworkType {
   509  	return &v
   510  }
   511  
   512  // Set implements flag.Value.
   513  func (n *NetworkType) Set(v string) error {
   514  	switch v {
   515  	case "sandbox":
   516  		*n = NetworkSandbox
   517  	case "host":
   518  		*n = NetworkHost
   519  	case "none":
   520  		*n = NetworkNone
   521  	default:
   522  		return fmt.Errorf("invalid network type %q", v)
   523  	}
   524  	return nil
   525  }
   526  
   527  // Get implements flag.Value.
   528  func (n *NetworkType) Get() any {
   529  	return *n
   530  }
   531  
   532  // String implements flag.Value.
   533  func (n NetworkType) String() string {
   534  	switch n {
   535  	case NetworkSandbox:
   536  		return "sandbox"
   537  	case NetworkHost:
   538  		return "host"
   539  	case NetworkNone:
   540  		return "none"
   541  	}
   542  	panic(fmt.Sprintf("Invalid network type %d", n))
   543  }
   544  
   545  // QueueingDiscipline is used to specify the kind of Queueing Discipline to
   546  // apply for a give FDBasedLink.
   547  type QueueingDiscipline int
   548  
   549  const (
   550  	// QDiscNone disables any queueing for the underlying FD.
   551  	QDiscNone QueueingDiscipline = iota
   552  
   553  	// QDiscFIFO applies a simple fifo based queue to the underlying FD.
   554  	QDiscFIFO
   555  )
   556  
   557  func queueingDisciplinePtr(v QueueingDiscipline) *QueueingDiscipline {
   558  	return &v
   559  }
   560  
   561  // Set implements flag.Value.
   562  func (q *QueueingDiscipline) Set(v string) error {
   563  	switch v {
   564  	case "none":
   565  		*q = QDiscNone
   566  	case "fifo":
   567  		*q = QDiscFIFO
   568  	default:
   569  		return fmt.Errorf("invalid qdisc %q", v)
   570  	}
   571  	return nil
   572  }
   573  
   574  // Get implements flag.Value.
   575  func (q *QueueingDiscipline) Get() any {
   576  	return *q
   577  }
   578  
   579  // String implements flag.Value.
   580  func (q QueueingDiscipline) String() string {
   581  	switch q {
   582  	case QDiscNone:
   583  		return "none"
   584  	case QDiscFIFO:
   585  		return "fifo"
   586  	}
   587  	panic(fmt.Sprintf("Invalid qdisc %d", q))
   588  }
   589  
   590  func leakModePtr(v refs.LeakMode) *refs.LeakMode {
   591  	return &v
   592  }
   593  
   594  func watchdogActionPtr(v watchdog.Action) *watchdog.Action {
   595  	return &v
   596  }
   597  
   598  // HostUDS tells how much of the host UDS the file system has access to.
   599  type HostUDS int
   600  
   601  const (
   602  	// HostUDSNone doesn't allows UDS from the host to be manipulated.
   603  	HostUDSNone HostUDS = 0x0
   604  
   605  	// HostUDSOpen allows UDS from the host to be opened, e.g. connect(2).
   606  	HostUDSOpen HostUDS = 0x1
   607  
   608  	// HostUDSCreate allows UDS from the host to be created, e.g. bind(2).
   609  	HostUDSCreate HostUDS = 0x2
   610  
   611  	// HostUDSAll allows all form of communication with the host through UDS.
   612  	HostUDSAll = HostUDSOpen | HostUDSCreate
   613  )
   614  
   615  func hostUDSPtr(v HostUDS) *HostUDS {
   616  	return &v
   617  }
   618  
   619  // Set implements flag.Value.
   620  func (g *HostUDS) Set(v string) error {
   621  	switch v {
   622  	case "", "none":
   623  		*g = HostUDSNone
   624  	case "open":
   625  		*g = HostUDSOpen
   626  	case "create":
   627  		*g = HostUDSCreate
   628  	case "all":
   629  		*g = HostUDSAll
   630  	default:
   631  		return fmt.Errorf("invalid host UDS type %q", v)
   632  	}
   633  	return nil
   634  }
   635  
   636  // Get implements flag.Value.
   637  func (g *HostUDS) Get() any {
   638  	return *g
   639  }
   640  
   641  // String implements flag.Value.
   642  func (g HostUDS) String() string {
   643  	// Note: the order of operations is important given that HostUDS is a bitmap.
   644  	if g == HostUDSNone {
   645  		return "none"
   646  	}
   647  	if g == HostUDSAll {
   648  		return "all"
   649  	}
   650  	if g == HostUDSOpen {
   651  		return "open"
   652  	}
   653  	if g == HostUDSCreate {
   654  		return "create"
   655  	}
   656  	panic(fmt.Sprintf("Invalid host UDS type %d", g))
   657  }
   658  
   659  // AllowOpen returns true if it can consume UDS from the host.
   660  func (g HostUDS) AllowOpen() bool {
   661  	return g&HostUDSOpen != 0
   662  }
   663  
   664  // AllowCreate returns true if it can create UDS in the host.
   665  func (g HostUDS) AllowCreate() bool {
   666  	return g&HostUDSCreate != 0
   667  }
   668  
   669  // HostFifo tells how much of the host FIFO (or named pipes) the file system has
   670  // access to.
   671  type HostFifo int
   672  
   673  const (
   674  	// HostFifoNone doesn't allow FIFO from the host to be manipulated.
   675  	HostFifoNone HostFifo = 0x0
   676  
   677  	// HostFifoOpen allows FIFOs from the host to be opened.
   678  	HostFifoOpen HostFifo = 0x1
   679  )
   680  
   681  func hostFifoPtr(v HostFifo) *HostFifo {
   682  	return &v
   683  }
   684  
   685  // Set implements flag.Value.
   686  func (g *HostFifo) Set(v string) error {
   687  	switch v {
   688  	case "", "none":
   689  		*g = HostFifoNone
   690  	case "open":
   691  		*g = HostFifoOpen
   692  	default:
   693  		return fmt.Errorf("invalid host fifo type %q", v)
   694  	}
   695  	return nil
   696  }
   697  
   698  // Get implements flag.Value.
   699  func (g *HostFifo) Get() any {
   700  	return *g
   701  }
   702  
   703  // String implements flag.Value.
   704  func (g HostFifo) String() string {
   705  	if g == HostFifoNone {
   706  		return "none"
   707  	}
   708  	if g == HostFifoOpen {
   709  		return "open"
   710  	}
   711  	panic(fmt.Sprintf("Invalid host fifo type %d", g))
   712  }
   713  
   714  // AllowOpen returns true if it can consume FIFOs from the host.
   715  func (g HostFifo) AllowOpen() bool {
   716  	return g&HostFifoOpen != 0
   717  }
   718  
   719  // Overlay2 holds the configuration for setting up overlay filesystems for the
   720  // container.
   721  type Overlay2 struct {
   722  	rootMount bool
   723  	subMounts bool
   724  	medium    string
   725  }
   726  
   727  func defaultOverlay2() *Overlay2 {
   728  	// Rootfs overlay is enabled by default and backed by a file in rootfs itself.
   729  	return &Overlay2{rootMount: true, subMounts: false, medium: "self"}
   730  }
   731  
   732  // Set implements flag.Value.
   733  func (o *Overlay2) Set(v string) error {
   734  	if v == "none" {
   735  		o.rootMount = false
   736  		o.subMounts = false
   737  		o.medium = ""
   738  		return nil
   739  	}
   740  	vs := strings.Split(v, ":")
   741  	if len(vs) != 2 {
   742  		return fmt.Errorf("expected format is --overlay2={mount}:{medium}, got %q", v)
   743  	}
   744  
   745  	switch mount := vs[0]; mount {
   746  	case "root":
   747  		o.rootMount = true
   748  	case "all":
   749  		o.rootMount = true
   750  		o.subMounts = true
   751  	default:
   752  		return fmt.Errorf("unexpected mount specifier for --overlay2: %q", mount)
   753  	}
   754  
   755  	o.medium = vs[1]
   756  	switch o.medium {
   757  	case "memory", "self": // OK
   758  	default:
   759  		if !strings.HasPrefix(o.medium, "dir=") {
   760  			return fmt.Errorf("unexpected medium specifier for --overlay2: %q", o.medium)
   761  		}
   762  		if hostFileDir := strings.TrimPrefix(o.medium, "dir="); !filepath.IsAbs(hostFileDir) {
   763  			return fmt.Errorf("overlay host file directory should be an absolute path, got %q", hostFileDir)
   764  		}
   765  	}
   766  	return nil
   767  }
   768  
   769  // Get implements flag.Value.
   770  func (o *Overlay2) Get() any {
   771  	return *o
   772  }
   773  
   774  // String implements flag.Value.
   775  func (o Overlay2) String() string {
   776  	if !o.rootMount && !o.subMounts {
   777  		return "none"
   778  	}
   779  	res := ""
   780  	switch {
   781  	case o.rootMount && o.subMounts:
   782  		res = "all"
   783  	case o.rootMount:
   784  		res = "root"
   785  	default:
   786  		panic("invalid state of subMounts = true and rootMount = false")
   787  	}
   788  
   789  	return res + ":" + o.medium
   790  }
   791  
   792  // Enabled returns true if the overlay option is enabled for any mounts.
   793  func (o *Overlay2) Enabled() bool {
   794  	return o.rootMount || o.subMounts
   795  }
   796  
   797  // RootEnabled returns true if the overlay is enabled for the root mount.
   798  func (o *Overlay2) RootEnabled() bool {
   799  	return o.rootMount
   800  }
   801  
   802  // SubMountEnabled returns true if the overlay is enabled for submounts.
   803  func (o *Overlay2) SubMountEnabled() bool {
   804  	return o.subMounts
   805  }
   806  
   807  // IsBackedByMemory indicates whether the overlay is backed by app memory.
   808  func (o *Overlay2) IsBackedByMemory() bool {
   809  	return o.Enabled() && o.medium == "memory"
   810  }
   811  
   812  // IsBackedBySelf indicates whether the overlaid mounts are backed by
   813  // themselves.
   814  func (o *Overlay2) IsBackedBySelf() bool {
   815  	return o.Enabled() && o.medium == "self"
   816  }
   817  
   818  // HostFileDir indicates the directory in which the overlay-backing host file
   819  // should be created.
   820  //
   821  // Precondition: o.IsBackedByHostFile() && !o.IsBackedBySelf().
   822  func (o *Overlay2) HostFileDir() string {
   823  	if !strings.HasPrefix(o.medium, "dir=") {
   824  		panic(fmt.Sprintf("Overlay2.Medium = %q does not have dir= prefix when overlay is backed by a host file", o.medium))
   825  	}
   826  	hostFileDir := strings.TrimPrefix(o.medium, "dir=")
   827  	if !filepath.IsAbs(hostFileDir) {
   828  		panic(fmt.Sprintf("overlay host file directory should be an absolute path, got %q", hostFileDir))
   829  	}
   830  	return hostFileDir
   831  }