github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/cgroupfs/devices.go (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package cgroupfs 16 17 import ( 18 "bytes" 19 "fmt" 20 "strconv" 21 "strings" 22 23 "github.com/metacubex/gvisor/pkg/context" 24 "github.com/metacubex/gvisor/pkg/errors/linuxerr" 25 "github.com/metacubex/gvisor/pkg/hostarch" 26 "github.com/metacubex/gvisor/pkg/sentry/fsimpl/kernfs" 27 "github.com/metacubex/gvisor/pkg/sentry/kernel" 28 "github.com/metacubex/gvisor/pkg/sentry/kernel/auth" 29 "github.com/metacubex/gvisor/pkg/sentry/vfs" 30 "github.com/metacubex/gvisor/pkg/sync" 31 "github.com/metacubex/gvisor/pkg/usermem" 32 ) 33 34 const ( 35 canRead = 1 << iota 36 canWrite 37 canMknod 38 ) 39 const ( 40 allowedDevices = "devices.allow" 41 controlledDevices = "devices.list" 42 deniedDevices = "devices.deny" 43 wildcardDeviceNumber = -1 44 ) 45 const ( 46 blockDevice deviceType = "b" 47 charDevice deviceType = "c" 48 wildcardDevice deviceType = "a" 49 ) 50 51 // type denotes a device's type. 52 type deviceType string 53 54 func (d deviceType) valid() bool { 55 switch d { 56 case wildcardDevice, charDevice, blockDevice: 57 return true 58 default: 59 return false 60 } 61 } 62 63 // permission represents a device access, read, write, and mknod. 64 type permission string 65 66 func (p permission) valid() bool { 67 for _, c := range p { 68 switch c { 69 case 'r', 'w', 'm': 70 continue 71 default: 72 return false 73 } 74 } 75 return true 76 } 77 78 // toBinary converts permission to its binary representation. 79 func (p permission) toBinary() int { 80 var perm int 81 for _, c := range p { 82 switch c { 83 case 'r': 84 perm |= canRead 85 case 'w': 86 perm |= canWrite 87 case 'm': 88 perm |= canMknod 89 } 90 } 91 return perm 92 } 93 94 // union returns a permission which unions p and perm. 95 func (p permission) union(perm permission) permission { 96 return fromBinary(p.toBinary() | perm.toBinary()) 97 } 98 99 // difference returns a permission which consists of accesses in p and not in perm. 100 func (p permission) difference(perm permission) permission { 101 return fromBinary(p.toBinary() & ^perm.toBinary()) 102 } 103 104 // fromBinary converts permission to its string representation. 105 func fromBinary(i int) permission { 106 var perm permission 107 if i&canRead == canRead { 108 perm += "r" 109 } 110 if i&canWrite == canWrite { 111 perm += "w" 112 } 113 if i&canMknod == canMknod { 114 perm += "m" 115 } 116 return perm 117 } 118 119 // +stateify savable 120 type deviceID struct { 121 // Device type, when the type is all, the following fields are ignored. 122 controllerType deviceType 123 // The device's major number. 124 major int64 125 // The device's minor number. 126 minor int64 127 } 128 129 // +stateify savable 130 type devicesController struct { 131 controllerCommon 132 controllerStateless 133 controllerNoResource 134 135 // mu protects the fields below. 136 mu sync.Mutex `state:"nosave"` 137 138 // Allow or deny the device rules below. 139 defaultAllow bool 140 deviceRules map[deviceID]permission 141 } 142 143 // +stateify savable 144 type allowedDevicesData struct { 145 c *devicesController 146 } 147 148 // Generate implements vfs.DynamicBytesSource.Generate. The devices.allow shows nothing. 149 func (d *allowedDevicesData) Generate(ctx context.Context, buf *bytes.Buffer) error { 150 return nil 151 } 152 153 // Write implements vfs.WritableDynamicBytesSource.Write. 154 func (d *allowedDevicesData) Write(ctx context.Context, _ *vfs.FileDescription, src usermem.IOSequence, offset int64) (int64, error) { 155 return d.c.write(ctx, src, offset, true) 156 } 157 158 // +stateify savable 159 type deniedDevicesData struct { 160 c *devicesController 161 } 162 163 // Generate implements vfs.DynamicBytesSource.Generate. The devices.deny shows nothing. 164 func (d *deniedDevicesData) Generate(ctx context.Context, buf *bytes.Buffer) error { 165 return nil 166 } 167 168 // Write implements vfs.WritableDynamicBytesSource.Write. 169 func (d *deniedDevicesData) Write(ctx context.Context, _ *vfs.FileDescription, src usermem.IOSequence, offset int64) (int64, error) { 170 return d.c.write(ctx, src, offset, false) 171 } 172 173 // +stateify savable 174 type controlledDevicesData struct { 175 c *devicesController 176 } 177 178 // Generate implements vfs.DynamicBytesSource.Generate. 179 // 180 // The corresponding devices.list shows devices for which access control is set. 181 func (d *controlledDevicesData) Generate(ctx context.Context, buf *bytes.Buffer) error { 182 return d.c.generate(ctx, buf) 183 } 184 185 func (c *devicesController) addRule(id deviceID, newPermission permission) error { 186 existingPermission := c.deviceRules[id] 187 c.deviceRules[id] = existingPermission.union(newPermission) 188 return nil 189 } 190 191 func (c *devicesController) removeRule(id deviceID, p permission) error { 192 // cgroupv1 ignores silently requests to remove a partially-matching wildcard rule, 193 // which are {majorDevice:wildcardDevice}, {wildcardDevice:minorDevice}, and {wildcardDevice:wildcardDevice} 194 for _, wildcardDeviceID := range []deviceID{ 195 {controllerType: id.controllerType, major: id.major, minor: wildcardDeviceNumber}, 196 {controllerType: id.controllerType, major: wildcardDeviceNumber, minor: id.minor}, 197 {controllerType: id.controllerType, major: wildcardDeviceNumber, minor: wildcardDeviceNumber}, 198 } { 199 // If there is a exact match, the permission needs to be updated. 200 if id == wildcardDeviceID { 201 continue 202 } 203 if _, exist := c.deviceRules[wildcardDeviceID]; exist { 204 return nil 205 } 206 } 207 if existingPermission, exist := c.deviceRules[id]; exist { 208 if newPermission := existingPermission.difference(p); len(newPermission) == 0 { 209 delete(c.deviceRules, id) 210 } else { 211 c.deviceRules[id] = newPermission 212 } 213 } 214 return nil 215 } 216 217 func (c *devicesController) applyRule(id deviceID, p permission, allow bool) error { 218 if !id.controllerType.valid() { 219 return linuxerr.EINVAL 220 } 221 // If the device type is all, it will reset the rules for all. 222 if id.controllerType == wildcardDevice { 223 c.defaultAllow = allow 224 c.deviceRules = make(map[deviceID]permission) 225 return nil 226 } 227 if !p.valid() { 228 return linuxerr.EINVAL 229 } 230 if len(c.deviceRules) == 0 { 231 c.defaultAllow = allow 232 c.deviceRules = make(map[deviceID]permission) 233 } 234 if allow == c.defaultAllow { 235 return c.addRule(id, p) 236 } 237 return c.removeRule(id, p) 238 } 239 240 func (c *devicesController) generate(ctx context.Context, buf *bytes.Buffer) error { 241 c.mu.Lock() 242 defer c.mu.Unlock() 243 switch { 244 case c.defaultAllow && len(c.deviceRules) > 0: 245 for id, p := range c.deviceRules { 246 buf.WriteString(deviceRuleString(id, p)) 247 // It lists one rule per line. 248 buf.WriteRune('\n') 249 } 250 case c.defaultAllow && len(c.deviceRules) == 0: 251 buf.WriteString(deviceRuleString(deviceID{controllerType: wildcardDevice, major: wildcardDeviceNumber, minor: wildcardDeviceNumber}, "rwm")) 252 case !c.defaultAllow && len(c.deviceRules) == 0: 253 buf.WriteString("") 254 default: 255 // When allow-all rule presents at devices.list, it actually indicates that 256 // the cgroup is in black-list mode. 257 buf.WriteString(deviceRuleString(deviceID{controllerType: wildcardDevice, major: wildcardDeviceNumber, minor: wildcardDeviceNumber}, "rwm")) 258 } 259 return nil 260 } 261 262 func (c *devicesController) write(ctx context.Context, src usermem.IOSequence, offset int64, allow bool) (int64, error) { 263 c.mu.Lock() 264 defer c.mu.Unlock() 265 if src.NumBytes() > hostarch.PageSize { 266 return 0, linuxerr.EINVAL 267 } 268 buf := copyScratchBufferFromContext(ctx, hostarch.PageSize) 269 n, err := src.CopyIn(ctx, buf) 270 if err != nil { 271 return 0, err 272 } 273 rule := string(buf[:n]) 274 fields := strings.FieldsFunc(rule, func(r rune) bool { 275 return r == ' ' || r == ':' 276 }) 277 switch { 278 case len(fields) != 1 && len(fields) != 4: 279 return 0, linuxerr.EINVAL 280 case len(fields) == 4: 281 controllerType := deviceType(fields[0]) 282 perm := permission(fields[3]) 283 if i := strings.IndexFunc(fields[3], func(r rune) bool { return r == '\n' }); i != -1 { 284 perm = perm[:i] 285 } 286 if len(perm) > 3 { 287 perm = perm[:3] 288 } 289 majorDevice, err := toDeviceNumber(fields[1]) 290 if err != nil { 291 return 0, err 292 } 293 minorDevice, err := toDeviceNumber(fields[2]) 294 if err != nil { 295 return 0, err 296 } 297 id := deviceID{ 298 controllerType: controllerType, 299 major: majorDevice, 300 minor: minorDevice, 301 } 302 if err := c.applyRule(id, perm, allow); err != nil { 303 return 0, err 304 } 305 case len(fields) == 1: 306 if deviceType(fields[0]) != wildcardDevice { 307 return 0, linuxerr.EINVAL 308 } 309 if err := c.applyRule(deviceID{controllerType: wildcardDevice}, permission(""), allow); err != nil { 310 return 0, err 311 } 312 } 313 return int64(n), nil 314 } 315 316 var _ controller = (*devicesController)(nil) 317 318 func newDevicesController(fs *filesystem) *devicesController { 319 // The root device cgroup starts with rwm to all. 320 c := &devicesController{ 321 defaultAllow: true, 322 deviceRules: make(map[deviceID]permission), 323 } 324 c.controllerCommon.init(kernel.CgroupControllerDevices, fs) 325 return c 326 } 327 328 // Clone implements controller.Clone. 329 func (c *devicesController) Clone() controller { 330 c.mu.Lock() 331 defer c.mu.Unlock() 332 newRules := make(map[deviceID]permission) 333 for id, p := range c.deviceRules { 334 newRules[id] = p 335 } 336 new := &devicesController{ 337 defaultAllow: c.defaultAllow, 338 deviceRules: newRules, 339 } 340 new.controllerCommon.cloneFromParent(c) 341 return new 342 } 343 344 // AddControlFiles implements controller.AddControlFiles. 345 func (c *devicesController) AddControlFiles(ctx context.Context, creds *auth.Credentials, _ *cgroupInode, contents map[string]kernfs.Inode) { 346 contents[allowedDevices] = c.fs.newControllerWritableFile(ctx, creds, &allowedDevicesData{c: c}, true) 347 contents[deniedDevices] = c.fs.newControllerWritableFile(ctx, creds, &deniedDevicesData{c: c}, true) 348 contents[controlledDevices] = c.fs.newControllerFile(ctx, creds, &controlledDevicesData{c: c}, true) 349 } 350 351 func deviceRuleString(id deviceID, p permission) string { 352 return fmt.Sprintf("%s %s:%s %s", id.controllerType, deviceNumber(id.major), deviceNumber(id.minor), p) 353 } 354 355 // deviceNumber converts a device number to string. 356 func deviceNumber(number int64) string { 357 if number == wildcardDeviceNumber { 358 return "*" 359 } 360 return fmt.Sprint(number) 361 } 362 363 func toDeviceNumber(s string) (int64, error) { 364 if s == "*" { 365 return wildcardDeviceNumber, nil 366 } 367 val, err := strconv.ParseInt(s, 10, 64) 368 if err != nil { 369 return 0, linuxerr.EINVAL 370 } 371 return val, nil 372 }