gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/fsimpl/cgroupfs/devices.go (about)

     1  // Copyright 2023 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cgroupfs
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"strconv"
    21  	"strings"
    22  
    23  	"gvisor.dev/gvisor/pkg/context"
    24  	"gvisor.dev/gvisor/pkg/errors/linuxerr"
    25  	"gvisor.dev/gvisor/pkg/hostarch"
    26  	"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
    27  	"gvisor.dev/gvisor/pkg/sentry/kernel"
    28  	"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
    29  	"gvisor.dev/gvisor/pkg/sentry/vfs"
    30  	"gvisor.dev/gvisor/pkg/sync"
    31  	"gvisor.dev/gvisor/pkg/usermem"
    32  )
    33  
    34  const (
    35  	canRead = 1 << iota
    36  	canWrite
    37  	canMknod
    38  )
    39  const (
    40  	allowedDevices       = "devices.allow"
    41  	controlledDevices    = "devices.list"
    42  	deniedDevices        = "devices.deny"
    43  	wildcardDeviceNumber = -1
    44  )
    45  const (
    46  	blockDevice    deviceType = "b"
    47  	charDevice     deviceType = "c"
    48  	wildcardDevice deviceType = "a"
    49  )
    50  
    51  // type denotes a device's type.
    52  type deviceType string
    53  
    54  func (d deviceType) valid() bool {
    55  	switch d {
    56  	case wildcardDevice, charDevice, blockDevice:
    57  		return true
    58  	default:
    59  		return false
    60  	}
    61  }
    62  
    63  // permission represents a device access, read, write, and mknod.
    64  type permission string
    65  
    66  func (p permission) valid() bool {
    67  	for _, c := range p {
    68  		switch c {
    69  		case 'r', 'w', 'm':
    70  			continue
    71  		default:
    72  			return false
    73  		}
    74  	}
    75  	return true
    76  }
    77  
    78  // toBinary converts permission to its binary representation.
    79  func (p permission) toBinary() int {
    80  	var perm int
    81  	for _, c := range p {
    82  		switch c {
    83  		case 'r':
    84  			perm |= canRead
    85  		case 'w':
    86  			perm |= canWrite
    87  		case 'm':
    88  			perm |= canMknod
    89  		}
    90  	}
    91  	return perm
    92  }
    93  
    94  // union returns a permission which unions p and perm.
    95  func (p permission) union(perm permission) permission {
    96  	return fromBinary(p.toBinary() | perm.toBinary())
    97  }
    98  
    99  // difference returns a permission which consists of accesses in p and not in perm.
   100  func (p permission) difference(perm permission) permission {
   101  	return fromBinary(p.toBinary() & ^perm.toBinary())
   102  }
   103  
   104  // fromBinary converts permission to its string representation.
   105  func fromBinary(i int) permission {
   106  	var perm permission
   107  	if i&canRead == canRead {
   108  		perm += "r"
   109  	}
   110  	if i&canWrite == canWrite {
   111  		perm += "w"
   112  	}
   113  	if i&canMknod == canMknod {
   114  		perm += "m"
   115  	}
   116  	return perm
   117  }
   118  
   119  // +stateify savable
   120  type deviceID struct {
   121  	// Device type, when the type is all, the following fields are ignored.
   122  	controllerType deviceType
   123  	// The device's major number.
   124  	major int64
   125  	// The device's minor number.
   126  	minor int64
   127  }
   128  
   129  // +stateify savable
   130  type devicesController struct {
   131  	controllerCommon
   132  	controllerStateless
   133  	controllerNoResource
   134  
   135  	// mu protects the fields below.
   136  	mu sync.Mutex `state:"nosave"`
   137  
   138  	// Allow or deny the device rules below.
   139  	defaultAllow bool
   140  	deviceRules  map[deviceID]permission
   141  }
   142  
   143  // +stateify savable
   144  type allowedDevicesData struct {
   145  	c *devicesController
   146  }
   147  
   148  // Generate implements vfs.DynamicBytesSource.Generate. The devices.allow shows nothing.
   149  func (d *allowedDevicesData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   150  	return nil
   151  }
   152  
   153  // Write implements vfs.WritableDynamicBytesSource.Write.
   154  func (d *allowedDevicesData) Write(ctx context.Context, _ *vfs.FileDescription, src usermem.IOSequence, offset int64) (int64, error) {
   155  	return d.c.write(ctx, src, offset, true)
   156  }
   157  
   158  // +stateify savable
   159  type deniedDevicesData struct {
   160  	c *devicesController
   161  }
   162  
   163  // Generate implements vfs.DynamicBytesSource.Generate. The devices.deny shows nothing.
   164  func (d *deniedDevicesData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   165  	return nil
   166  }
   167  
   168  // Write implements vfs.WritableDynamicBytesSource.Write.
   169  func (d *deniedDevicesData) Write(ctx context.Context, _ *vfs.FileDescription, src usermem.IOSequence, offset int64) (int64, error) {
   170  	return d.c.write(ctx, src, offset, false)
   171  }
   172  
   173  // +stateify savable
   174  type controlledDevicesData struct {
   175  	c *devicesController
   176  }
   177  
   178  // Generate implements vfs.DynamicBytesSource.Generate.
   179  //
   180  // The corresponding devices.list shows devices for which access control is set.
   181  func (d *controlledDevicesData) Generate(ctx context.Context, buf *bytes.Buffer) error {
   182  	return d.c.generate(ctx, buf)
   183  }
   184  
   185  func (c *devicesController) addRule(id deviceID, newPermission permission) error {
   186  	existingPermission := c.deviceRules[id]
   187  	c.deviceRules[id] = existingPermission.union(newPermission)
   188  	return nil
   189  }
   190  
   191  func (c *devicesController) removeRule(id deviceID, p permission) error {
   192  	// cgroupv1 ignores silently requests to remove a partially-matching wildcard rule,
   193  	// which are {majorDevice:wildcardDevice}, {wildcardDevice:minorDevice}, and {wildcardDevice:wildcardDevice}
   194  	for _, wildcardDeviceID := range []deviceID{
   195  		{controllerType: id.controllerType, major: id.major, minor: wildcardDeviceNumber},
   196  		{controllerType: id.controllerType, major: wildcardDeviceNumber, minor: id.minor},
   197  		{controllerType: id.controllerType, major: wildcardDeviceNumber, minor: wildcardDeviceNumber},
   198  	} {
   199  		// If there is a exact match, the permission needs to be updated.
   200  		if id == wildcardDeviceID {
   201  			continue
   202  		}
   203  		if _, exist := c.deviceRules[wildcardDeviceID]; exist {
   204  			return nil
   205  		}
   206  	}
   207  	if existingPermission, exist := c.deviceRules[id]; exist {
   208  		if newPermission := existingPermission.difference(p); len(newPermission) == 0 {
   209  			delete(c.deviceRules, id)
   210  		} else {
   211  			c.deviceRules[id] = newPermission
   212  		}
   213  	}
   214  	return nil
   215  }
   216  
   217  func (c *devicesController) applyRule(id deviceID, p permission, allow bool) error {
   218  	if !id.controllerType.valid() {
   219  		return linuxerr.EINVAL
   220  	}
   221  	// If the device type is all, it will reset the rules for all.
   222  	if id.controllerType == wildcardDevice {
   223  		c.defaultAllow = allow
   224  		clear(c.deviceRules)
   225  		return nil
   226  	}
   227  	if !p.valid() {
   228  		return linuxerr.EINVAL
   229  	}
   230  	if len(c.deviceRules) == 0 {
   231  		c.defaultAllow = allow
   232  		clear(c.deviceRules)
   233  	}
   234  	if allow == c.defaultAllow {
   235  		return c.addRule(id, p)
   236  	}
   237  	return c.removeRule(id, p)
   238  }
   239  
   240  func (c *devicesController) generate(ctx context.Context, buf *bytes.Buffer) error {
   241  	c.mu.Lock()
   242  	defer c.mu.Unlock()
   243  	switch {
   244  	case c.defaultAllow && len(c.deviceRules) > 0:
   245  		for id, p := range c.deviceRules {
   246  			buf.WriteString(deviceRuleString(id, p))
   247  			// It lists one rule per line.
   248  			buf.WriteRune('\n')
   249  		}
   250  	case c.defaultAllow && len(c.deviceRules) == 0:
   251  		buf.WriteString(deviceRuleString(deviceID{controllerType: wildcardDevice, major: wildcardDeviceNumber, minor: wildcardDeviceNumber}, "rwm"))
   252  	case !c.defaultAllow && len(c.deviceRules) == 0:
   253  		buf.WriteString("")
   254  	default:
   255  		// When allow-all rule presents at devices.list, it actually indicates that
   256  		// the cgroup is in black-list mode.
   257  		buf.WriteString(deviceRuleString(deviceID{controllerType: wildcardDevice, major: wildcardDeviceNumber, minor: wildcardDeviceNumber}, "rwm"))
   258  	}
   259  	return nil
   260  }
   261  
   262  func (c *devicesController) write(ctx context.Context, src usermem.IOSequence, offset int64, allow bool) (int64, error) {
   263  	c.mu.Lock()
   264  	defer c.mu.Unlock()
   265  	if src.NumBytes() > hostarch.PageSize {
   266  		return 0, linuxerr.EINVAL
   267  	}
   268  	buf := copyScratchBufferFromContext(ctx, hostarch.PageSize)
   269  	n, err := src.CopyIn(ctx, buf)
   270  	if err != nil {
   271  		return 0, err
   272  	}
   273  	rule := string(buf[:n])
   274  	fields := strings.FieldsFunc(rule, func(r rune) bool {
   275  		return r == ' ' || r == ':'
   276  	})
   277  	switch {
   278  	case len(fields) != 1 && len(fields) != 4:
   279  		return 0, linuxerr.EINVAL
   280  	case len(fields) == 4:
   281  		controllerType := deviceType(fields[0])
   282  		perm := permission(fields[3])
   283  		if i := strings.IndexFunc(fields[3], func(r rune) bool { return r == '\n' }); i != -1 {
   284  			perm = perm[:i]
   285  		}
   286  		if len(perm) > 3 {
   287  			perm = perm[:3]
   288  		}
   289  		majorDevice, err := toDeviceNumber(fields[1])
   290  		if err != nil {
   291  			return 0, err
   292  		}
   293  		minorDevice, err := toDeviceNumber(fields[2])
   294  		if err != nil {
   295  			return 0, err
   296  		}
   297  		id := deviceID{
   298  			controllerType: controllerType,
   299  			major:          majorDevice,
   300  			minor:          minorDevice,
   301  		}
   302  		if err := c.applyRule(id, perm, allow); err != nil {
   303  			return 0, err
   304  		}
   305  	case len(fields) == 1:
   306  		if deviceType(fields[0]) != wildcardDevice {
   307  			return 0, linuxerr.EINVAL
   308  		}
   309  		if err := c.applyRule(deviceID{controllerType: wildcardDevice}, permission(""), allow); err != nil {
   310  			return 0, err
   311  		}
   312  	}
   313  	return int64(n), nil
   314  }
   315  
   316  var _ controller = (*devicesController)(nil)
   317  
   318  func newDevicesController(fs *filesystem) *devicesController {
   319  	// The root device cgroup starts with rwm to all.
   320  	c := &devicesController{
   321  		defaultAllow: true,
   322  		deviceRules:  make(map[deviceID]permission),
   323  	}
   324  	c.controllerCommon.init(kernel.CgroupControllerDevices, fs)
   325  	return c
   326  }
   327  
   328  // Clone implements controller.Clone.
   329  func (c *devicesController) Clone() controller {
   330  	c.mu.Lock()
   331  	defer c.mu.Unlock()
   332  	newRules := make(map[deviceID]permission)
   333  	for id, p := range c.deviceRules {
   334  		newRules[id] = p
   335  	}
   336  	new := &devicesController{
   337  		defaultAllow: c.defaultAllow,
   338  		deviceRules:  newRules,
   339  	}
   340  	new.controllerCommon.cloneFromParent(c)
   341  	return new
   342  }
   343  
   344  // AddControlFiles implements controller.AddControlFiles.
   345  func (c *devicesController) AddControlFiles(ctx context.Context, creds *auth.Credentials, _ *cgroupInode, contents map[string]kernfs.Inode) {
   346  	contents[allowedDevices] = c.fs.newControllerWritableFile(ctx, creds, &allowedDevicesData{c: c}, true)
   347  	contents[deniedDevices] = c.fs.newControllerWritableFile(ctx, creds, &deniedDevicesData{c: c}, true)
   348  	contents[controlledDevices] = c.fs.newControllerFile(ctx, creds, &controlledDevicesData{c: c}, true)
   349  }
   350  
   351  func deviceRuleString(id deviceID, p permission) string {
   352  	return fmt.Sprintf("%s %s:%s %s", id.controllerType, deviceNumber(id.major), deviceNumber(id.minor), p)
   353  }
   354  
   355  // deviceNumber converts a device number to string.
   356  func deviceNumber(number int64) string {
   357  	if number == wildcardDeviceNumber {
   358  		return "*"
   359  	}
   360  	return fmt.Sprint(number)
   361  }
   362  
   363  func toDeviceNumber(s string) (int64, error) {
   364  	if s == "*" {
   365  		return wildcardDeviceNumber, nil
   366  	}
   367  	val, err := strconv.ParseInt(s, 10, 64)
   368  	if err != nil {
   369  		return 0, linuxerr.EINVAL
   370  	}
   371  	return val, nil
   372  }