github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/runsc/specutils/fs.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package specutils
    16  
    17  import (
    18  	"fmt"
    19  	"math/bits"
    20  	"path"
    21  	"strings"
    22  
    23  	specs "github.com/opencontainers/runtime-spec/specs-go"
    24  	"golang.org/x/sys/unix"
    25  	"github.com/metacubex/gvisor/pkg/log"
    26  )
    27  
    28  type mapping struct {
    29  	set bool
    30  	val uint32
    31  }
    32  
    33  // optionsMap maps mount propagation-related OCI filesystem options to mount(2)
    34  // syscall flags.
    35  var optionsMap = map[string]mapping{
    36  	"acl":           {set: true, val: unix.MS_POSIXACL},
    37  	"async":         {set: false, val: unix.MS_SYNCHRONOUS},
    38  	"atime":         {set: false, val: unix.MS_NOATIME},
    39  	"bind":          {set: true, val: unix.MS_BIND},
    40  	"defaults":      {set: true, val: 0},
    41  	"dev":           {set: false, val: unix.MS_NODEV},
    42  	"diratime":      {set: false, val: unix.MS_NODIRATIME},
    43  	"dirsync":       {set: true, val: unix.MS_DIRSYNC},
    44  	"exec":          {set: false, val: unix.MS_NOEXEC},
    45  	"noexec":        {set: true, val: unix.MS_NOEXEC},
    46  	"iversion":      {set: true, val: unix.MS_I_VERSION},
    47  	"loud":          {set: false, val: unix.MS_SILENT},
    48  	"mand":          {set: true, val: unix.MS_MANDLOCK},
    49  	"noacl":         {set: false, val: unix.MS_POSIXACL},
    50  	"noatime":       {set: true, val: unix.MS_NOATIME},
    51  	"nodev":         {set: true, val: unix.MS_NODEV},
    52  	"nodiratime":    {set: true, val: unix.MS_NODIRATIME},
    53  	"noiversion":    {set: false, val: unix.MS_I_VERSION},
    54  	"nomand":        {set: false, val: unix.MS_MANDLOCK},
    55  	"norelatime":    {set: false, val: unix.MS_RELATIME},
    56  	"nostrictatime": {set: false, val: unix.MS_STRICTATIME},
    57  	"nosuid":        {set: true, val: unix.MS_NOSUID},
    58  	"rbind":         {set: true, val: unix.MS_BIND | unix.MS_REC},
    59  	"relatime":      {set: true, val: unix.MS_RELATIME},
    60  	"remount":       {set: true, val: unix.MS_REMOUNT},
    61  	"ro":            {set: true, val: unix.MS_RDONLY},
    62  	"rw":            {set: false, val: unix.MS_RDONLY},
    63  	"silent":        {set: true, val: unix.MS_SILENT},
    64  	"strictatime":   {set: true, val: unix.MS_STRICTATIME},
    65  	"suid":          {set: false, val: unix.MS_NOSUID},
    66  	"sync":          {set: true, val: unix.MS_SYNCHRONOUS},
    67  }
    68  
    69  // propOptionsMap is similar to optionsMap, but it lists propagation options
    70  // that cannot be used together with other flags.
    71  var propOptionsMap = map[string]mapping{
    72  	"private":     {set: true, val: unix.MS_PRIVATE},
    73  	"rprivate":    {set: true, val: unix.MS_PRIVATE | unix.MS_REC},
    74  	"slave":       {set: true, val: unix.MS_SLAVE},
    75  	"rslave":      {set: true, val: unix.MS_SLAVE | unix.MS_REC},
    76  	"unbindable":  {set: true, val: unix.MS_UNBINDABLE},
    77  	"runbindable": {set: true, val: unix.MS_UNBINDABLE | unix.MS_REC},
    78  }
    79  
    80  // invalidOptions list options not allowed.
    81  //   - shared: sandbox must be isolated from the host. Propagating mount changes
    82  //     from the sandbox to the host breaks the isolation. The sandbox's mount
    83  //     table is maintained in sentry memory. Mount operations from the application
    84  //     are not propagated to the host.
    85  var invalidOptions = []string{"shared", "rshared"}
    86  
    87  // OptionsToFlags converts mount options to syscall flags.
    88  func OptionsToFlags(opts []string) uint32 {
    89  	return optionsToFlags(opts, optionsMap)
    90  }
    91  
    92  // PropOptionsToFlags converts propagation mount options to syscall flags.
    93  // Propagation options cannot be set other with other options and must be
    94  // handled separately.
    95  func PropOptionsToFlags(opts []string) uint32 {
    96  	return optionsToFlags(opts, propOptionsMap)
    97  }
    98  
    99  func optionsToFlags(opts []string, source map[string]mapping) uint32 {
   100  	var rv uint32
   101  	for _, opt := range opts {
   102  		if m, ok := source[opt]; ok {
   103  			if m.set {
   104  				rv |= m.val
   105  			} else {
   106  				rv ^= m.val
   107  			}
   108  		}
   109  	}
   110  	return rv
   111  }
   112  
   113  // IsReadonlyMount returns true if the mount options has read only option.
   114  func IsReadonlyMount(opts []string) bool {
   115  	for _, o := range opts {
   116  		if o == "ro" {
   117  			return true
   118  		}
   119  	}
   120  	return false
   121  }
   122  
   123  // validateMount validates that spec mounts are correct.
   124  func validateMount(mnt *specs.Mount) error {
   125  	if !path.IsAbs(mnt.Destination) {
   126  		return fmt.Errorf("Mount.Destination must be an absolute path: %v", mnt)
   127  	}
   128  	if mnt.Type == "bind" {
   129  		return ValidateMountOptions(mnt.Options)
   130  	}
   131  	return nil
   132  }
   133  
   134  func moptKey(opt string) string {
   135  	if len(opt) == 0 {
   136  		return opt
   137  	}
   138  	// Guaranteed to have at least one token, since opt is not empty.
   139  	return strings.SplitN(opt, "=", 2)[0]
   140  }
   141  
   142  // FilterMountOptions filters out all invalid mount options.
   143  func FilterMountOptions(opts []string) []string {
   144  	out := make([]string, 0, len(opts))
   145  	for _, o := range opts {
   146  		if err := validateMountOption(o); err == nil {
   147  			out = append(out, o)
   148  		} else {
   149  			log.Warningf("mount option skipped %q: %v", o, err)
   150  		}
   151  	}
   152  	return out
   153  }
   154  
   155  // ValidateMountOptions validates that mount options are correct.
   156  func ValidateMountOptions(opts []string) error {
   157  	for _, o := range opts {
   158  		if err := validateMountOption(o); err != nil {
   159  			return err
   160  		}
   161  	}
   162  	return nil
   163  }
   164  
   165  func validateMountOption(o string) error {
   166  	if ContainsStr(invalidOptions, o) {
   167  		return fmt.Errorf("mount option %q is not supported", o)
   168  	}
   169  	_, ok1 := optionsMap[o]
   170  	_, ok2 := propOptionsMap[o]
   171  	if !ok1 && !ok2 {
   172  		return fmt.Errorf("unknown mount option %q", o)
   173  	}
   174  	return validatePropagation(o)
   175  }
   176  
   177  // ValidateRootfsPropagation validates that rootfs propagation options are
   178  // correct.
   179  func validateRootfsPropagation(opt string) error {
   180  	flags := PropOptionsToFlags([]string{opt})
   181  	if flags&(unix.MS_SLAVE|unix.MS_PRIVATE) == 0 {
   182  		return fmt.Errorf("root mount propagation option must specify private or slave: %q", opt)
   183  	}
   184  	return validatePropagation(opt)
   185  }
   186  
   187  func validatePropagation(opt string) error {
   188  	flags := PropOptionsToFlags([]string{opt})
   189  	exclusive := flags & (unix.MS_SLAVE | unix.MS_PRIVATE | unix.MS_SHARED | unix.MS_UNBINDABLE)
   190  	if bits.OnesCount32(exclusive) > 1 {
   191  		return fmt.Errorf("mount propagation options are mutually exclusive: %q", opt)
   192  	}
   193  	return nil
   194  }