github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/runsc/config/config.go

github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/runsc/config/config.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package config provides basic infrastructure to set configuration settings
    16  // for runsc. The configuration is set by flags to the command line. They can
    17  // also propagate to a different process using the same flags.
    18  package config
    19  
    20  import (
    21  	"fmt"
    22  
    23  	"github.com/SagerNet/gvisor/pkg/refs"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/watchdog"
    25  )
    26  
    27  // Config holds configuration that is not part of the runtime spec.
    28  //
    29  // Follow these steps to add a new flag:
    30  //   1. Create a new field in Config.
    31  //   2. Add a field tag with the flag name
    32  //   3. Register a new flag in flags.go, with name and description
    33  //   4. Add any necessary validation into validate()
    34  //   5. If adding an enum, follow the same pattern as FileAccessType
    35  //
    36  type Config struct {
    37  	// RootDir is the runtime root directory.
    38  	RootDir string `flag:"root"`
    39  
    40  	// Traceback changes the Go runtime's traceback level.
    41  	Traceback string `flag:"traceback"`
    42  
    43  	// Debug indicates that debug logging should be enabled.
    44  	Debug bool `flag:"debug"`
    45  
    46  	// LogFilename is the filename to log to, if not empty.
    47  	LogFilename string `flag:"log"`
    48  
    49  	// LogFormat is the log format.
    50  	LogFormat string `flag:"log-format"`
    51  
    52  	// DebugLog is the path to log debug information to, if not empty.
    53  	DebugLog string `flag:"debug-log"`
    54  
    55  	// PanicLog is the path to log GO's runtime messages, if not empty.
    56  	PanicLog string `flag:"panic-log"`
    57  
    58  	// CoverageReport is the path to write Go coverage information, if not empty.
    59  	CoverageReport string `flag:"coverage-report"`
    60  
    61  	// DebugLogFormat is the log format for debug.
    62  	DebugLogFormat string `flag:"debug-log-format"`
    63  
    64  	// FileAccess indicates how the root filesystem is accessed.
    65  	FileAccess FileAccessType `flag:"file-access"`
    66  
    67  	// FileAccessMounts indicates how non-root volumes are accessed.
    68  	FileAccessMounts FileAccessType `flag:"file-access-mounts"`
    69  
    70  	// Overlay is whether to wrap the root filesystem in an overlay.
    71  	Overlay bool `flag:"overlay"`
    72  
    73  	// Verity is whether there's one or more verity file system to mount.
    74  	Verity bool `flag:"verity"`
    75  
    76  	// FSGoferHostUDS enables the gofer to mount a host UDS.
    77  	FSGoferHostUDS bool `flag:"fsgofer-host-uds"`
    78  
    79  	// Network indicates what type of network to use.
    80  	Network NetworkType `flag:"network"`
    81  
    82  	// EnableRaw indicates whether raw sockets should be enabled. Raw
    83  	// sockets are disabled by stripping CAP_NET_RAW from the list of
    84  	// capabilities.
    85  	EnableRaw bool `flag:"net-raw"`
    86  
    87  	// HardwareGSO indicates that hardware segmentation offload is enabled.
    88  	HardwareGSO bool `flag:"gso"`
    89  
    90  	// SoftwareGSO indicates that software segmentation offload is enabled.
    91  	SoftwareGSO bool `flag:"software-gso"`
    92  
    93  	// TXChecksumOffload indicates that TX Checksum Offload is enabled.
    94  	TXChecksumOffload bool `flag:"tx-checksum-offload"`
    95  
    96  	// RXChecksumOffload indicates that RX Checksum Offload is enabled.
    97  	RXChecksumOffload bool `flag:"rx-checksum-offload"`
    98  
    99  	// QDisc indicates the type of queuening discipline to use by default
   100  	// for non-loopback interfaces.
   101  	QDisc QueueingDiscipline `flag:"qdisc"`
   102  
   103  	// LogPackets indicates that all network packets should be logged.
   104  	LogPackets bool `flag:"log-packets"`
   105  
   106  	// Platform is the platform to run on.
   107  	Platform string `flag:"platform"`
   108  
   109  	// Strace indicates that strace should be enabled.
   110  	Strace bool `flag:"strace"`
   111  
   112  	// StraceSyscalls is the set of syscalls to trace (comma-separated values).
   113  	// If StraceEnable is true and this string is empty, then all syscalls will
   114  	// be traced.
   115  	StraceSyscalls string `flag:"strace-syscalls"`
   116  
   117  	// StraceLogSize is the max size of data blobs to display.
   118  	StraceLogSize uint `flag:"strace-log-size"`
   119  
   120  	// DisableSeccomp indicates whether seccomp syscall filters should be
   121  	// disabled. Pardon the double negation, but default to enabled is important.
   122  	DisableSeccomp bool
   123  
   124  	// WatchdogAction sets what action the watchdog takes when triggered.
   125  	WatchdogAction watchdog.Action `flag:"watchdog-action"`
   126  
   127  	// PanicSignal registers signal handling that panics. Usually set to
   128  	// SIGUSR2(12) to troubleshoot hangs. -1 disables it.
   129  	PanicSignal int `flag:"panic-signal"`
   130  
   131  	// ProfileEnable is set to prepare the sandbox to be profiled.
   132  	ProfileEnable bool `flag:"profile"`
   133  
   134  	// RestoreFile is the path to the saved container image
   135  	RestoreFile string
   136  
   137  	// NumNetworkChannels controls the number of AF_PACKET sockets that map
   138  	// to the same underlying network device. This allows netstack to better
   139  	// scale for high throughput use cases.
   140  	NumNetworkChannels int `flag:"num-network-channels"`
   141  
   142  	// Rootless allows the sandbox to be started with a user that is not root.
   143  	// Defense in depth measures are weaker in rootless mode. Specifically, the
   144  	// sandbox and Gofer process run as root inside a user namespace with root
   145  	// mapped to the caller's user.
   146  	Rootless bool `flag:"rootless"`
   147  
   148  	// AlsoLogToStderr allows to send log messages to stderr.
   149  	AlsoLogToStderr bool `flag:"alsologtostderr"`
   150  
   151  	// ReferenceLeakMode sets reference leak check mode
   152  	ReferenceLeak refs.LeakMode `flag:"ref-leak-mode"`
   153  
   154  	// CPUNumFromQuota sets CPU number count to available CPU quota, using
   155  	// least integer value greater than or equal to quota.
   156  	//
   157  	// E.g. 0.2 CPU quota will result in 1, and 1.9 in 2.
   158  	CPUNumFromQuota bool `flag:"cpu-num-from-quota"`
   159  
   160  	// Enables VFS2.
   161  	VFS2 bool `flag:"vfs2"`
   162  
   163  	// Enables FUSE usage.
   164  	FUSE bool `flag:"fuse"`
   165  
   166  	// Allows overriding of flags in OCI annotations.
   167  	AllowFlagOverride bool `flag:"allow-flag-override"`
   168  
   169  	// Enables seccomp inside the sandbox.
   170  	OCISeccomp bool `flag:"oci-seccomp"`
   171  
   172  	// Mounts the cgroup filesystem backed by the sentry's cgroupfs.
   173  	Cgroupfs bool `flag:"cgroupfs"`
   174  
   175  	// TestOnlyAllowRunAsCurrentUserWithoutChroot should only be used in
   176  	// tests. It allows runsc to start the sandbox process as the current
   177  	// user, and without chrooting the sandbox process. This can be
   178  	// necessary in test environments that have limited capabilities.
   179  	TestOnlyAllowRunAsCurrentUserWithoutChroot bool `flag:"TESTONLY-unsafe-nonroot"`
   180  
   181  	// TestOnlyTestNameEnv should only be used in tests. It looks up for the
   182  	// test name in the container environment variables and adds it to the debug
   183  	// log file name. This is done to help identify the log with the test when
   184  	// multiple tests are run in parallel, since there is no way to pass
   185  	// parameters to the runtime from docker.
   186  	TestOnlyTestNameEnv string `flag:"TESTONLY-test-name-env"`
   187  }
   188  
   189  func (c *Config) validate() error {
   190  	if c.FileAccess == FileAccessShared && c.Overlay {
   191  		return fmt.Errorf("overlay flag is incompatible with shared file access")
   192  	}
   193  	if c.NumNetworkChannels <= 0 {
   194  		return fmt.Errorf("num_network_channels must be > 0, got: %d", c.NumNetworkChannels)
   195  	}
   196  	return nil
   197  }
   198  
   199  // FileAccessType tells how the filesystem is accessed.
   200  type FileAccessType int
   201  
   202  const (
   203  	// FileAccessExclusive gives the sandbox exclusive access over files and
   204  	// directories in the filesystem. No external modifications are permitted and
   205  	// can lead to undefined behavior.
   206  	//
   207  	// Exclusive filesystem access enables more aggressive caching and offers
   208  	// significantly better performance. This is the default mode for the root
   209  	// volume.
   210  	FileAccessExclusive FileAccessType = iota
   211  
   212  	// FileAccessShared is used for volumes that can have external changes. It
   213  	// requires revalidation on every filesystem access to detect external
   214  	// changes, and reduces the amount of caching that can be done. This is the
   215  	// default mode for non-root volumes.
   216  	FileAccessShared
   217  )
   218  
   219  func fileAccessTypePtr(v FileAccessType) *FileAccessType {
   220  	return &v
   221  }
   222  
   223  // Set implements flag.Value.
   224  func (f *FileAccessType) Set(v string) error {
   225  	switch v {
   226  	case "shared":
   227  		*f = FileAccessShared
   228  	case "exclusive":
   229  		*f = FileAccessExclusive
   230  	default:
   231  		return fmt.Errorf("invalid file access type %q", v)
   232  	}
   233  	return nil
   234  }
   235  
   236  // Get implements flag.Value.
   237  func (f *FileAccessType) Get() interface{} {
   238  	return *f
   239  }
   240  
   241  // String implements flag.Value.
   242  func (f FileAccessType) String() string {
   243  	switch f {
   244  	case FileAccessShared:
   245  		return "shared"
   246  	case FileAccessExclusive:
   247  		return "exclusive"
   248  	}
   249  	panic(fmt.Sprintf("Invalid file access type %d", f))
   250  }
   251  
   252  // NetworkType tells which network stack to use.
   253  type NetworkType int
   254  
   255  const (
   256  	// NetworkSandbox uses internal network stack, isolated from the host.
   257  	NetworkSandbox NetworkType = iota
   258  
   259  	// NetworkHost redirects network related syscalls to the host network.
   260  	NetworkHost
   261  
   262  	// NetworkNone sets up just loopback using netstack.
   263  	NetworkNone
   264  )
   265  
   266  func networkTypePtr(v NetworkType) *NetworkType {
   267  	return &v
   268  }
   269  
   270  // Set implements flag.Value.
   271  func (n *NetworkType) Set(v string) error {
   272  	switch v {
   273  	case "sandbox":
   274  		*n = NetworkSandbox
   275  	case "host":
   276  		*n = NetworkHost
   277  	case "none":
   278  		*n = NetworkNone
   279  	default:
   280  		return fmt.Errorf("invalid network type %q", v)
   281  	}
   282  	return nil
   283  }
   284  
   285  // Get implements flag.Value.
   286  func (n *NetworkType) Get() interface{} {
   287  	return *n
   288  }
   289  
   290  // String implements flag.Value.
   291  func (n NetworkType) String() string {
   292  	switch n {
   293  	case NetworkSandbox:
   294  		return "sandbox"
   295  	case NetworkHost:
   296  		return "host"
   297  	case NetworkNone:
   298  		return "none"
   299  	}
   300  	panic(fmt.Sprintf("Invalid network type %d", n))
   301  }
   302  
   303  // QueueingDiscipline is used to specify the kind of Queueing Discipline to
   304  // apply for a give FDBasedLink.
   305  type QueueingDiscipline int
   306  
   307  const (
   308  	// QDiscNone disables any queueing for the underlying FD.
   309  	QDiscNone QueueingDiscipline = iota
   310  
   311  	// QDiscFIFO applies a simple fifo based queue to the underlying FD.
   312  	QDiscFIFO
   313  )
   314  
   315  func queueingDisciplinePtr(v QueueingDiscipline) *QueueingDiscipline {
   316  	return &v
   317  }
   318  
   319  // Set implements flag.Value.
   320  func (q *QueueingDiscipline) Set(v string) error {
   321  	switch v {
   322  	case "none":
   323  		*q = QDiscNone
   324  	case "fifo":
   325  		*q = QDiscFIFO
   326  	default:
   327  		return fmt.Errorf("invalid qdisc %q", v)
   328  	}
   329  	return nil
   330  }
   331  
   332  // Get implements flag.Value.
   333  func (q *QueueingDiscipline) Get() interface{} {
   334  	return *q
   335  }
   336  
   337  // String implements flag.Value.
   338  func (q QueueingDiscipline) String() string {
   339  	switch q {
   340  	case QDiscNone:
   341  		return "none"
   342  	case QDiscFIFO:
   343  		return "fifo"
   344  	}
   345  	panic(fmt.Sprintf("Invalid qdisc %d", q))
   346  }
   347  
   348  func leakModePtr(v refs.LeakMode) *refs.LeakMode {
   349  	return &v
   350  }
   351  
   352  func watchdogActionPtr(v watchdog.Action) *watchdog.Action {
   353  	return &v
   354  }