gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/vfs/permissions.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package vfs 16 17 import ( 18 "math" 19 "strings" 20 21 "gvisor.dev/gvisor/pkg/abi/linux" 22 "gvisor.dev/gvisor/pkg/context" 23 "gvisor.dev/gvisor/pkg/errors/linuxerr" 24 "gvisor.dev/gvisor/pkg/sentry/kernel/auth" 25 "gvisor.dev/gvisor/pkg/sentry/limits" 26 ) 27 28 // AccessTypes is a bitmask of Unix file permissions. 29 // 30 // +stateify savable 31 type AccessTypes uint16 32 33 // Bits in AccessTypes. 34 const ( 35 MayExec AccessTypes = 1 36 MayWrite AccessTypes = 2 37 MayRead AccessTypes = 4 38 ) 39 40 // OnlyRead returns true if access _only_ allows read. 41 func (a AccessTypes) OnlyRead() bool { 42 return a == MayRead 43 } 44 45 // MayRead returns true if access allows read. 46 func (a AccessTypes) MayRead() bool { 47 return a&MayRead != 0 48 } 49 50 // MayWrite returns true if access allows write. 51 func (a AccessTypes) MayWrite() bool { 52 return a&MayWrite != 0 53 } 54 55 // MayExec returns true if access allows exec. 56 func (a AccessTypes) MayExec() bool { 57 return a&MayExec != 0 58 } 59 60 // GenericCheckPermissions checks that creds has the given access rights on a 61 // file with the given permissions, UID, and GID, subject to the rules of 62 // fs/namei.c:generic_permission(). 63 func GenericCheckPermissions(creds *auth.Credentials, ats AccessTypes, mode linux.FileMode, kuid auth.KUID, kgid auth.KGID) error { 64 // Check permission bits. 65 perms := uint16(mode.Permissions()) 66 if creds.EffectiveKUID == kuid { 67 perms >>= 6 68 } else if creds.InGroup(kgid) { 69 perms >>= 3 70 } 71 if uint16(ats)&perms == uint16(ats) { 72 // All permission bits match, access granted. 73 return nil 74 } 75 76 // Caller capabilities require that the file's KUID and KGID are mapped in 77 // the caller's user namespace; compare 78 // kernel/capability.c:privileged_wrt_inode_uidgid(). 79 if !kuid.In(creds.UserNamespace).Ok() || !kgid.In(creds.UserNamespace).Ok() { 80 return linuxerr.EACCES 81 } 82 // CAP_DAC_READ_SEARCH allows the caller to read and search arbitrary 83 // directories, and read arbitrary non-directory files. 84 if (mode.IsDir() && !ats.MayWrite()) || ats.OnlyRead() { 85 if creds.HasCapability(linux.CAP_DAC_READ_SEARCH) { 86 return nil 87 } 88 } 89 // CAP_DAC_OVERRIDE allows arbitrary access to directories, read/write 90 // access to non-directory files, and execute access to non-directory files 91 // for which at least one execute bit is set. 92 if mode.IsDir() || !ats.MayExec() || (mode.Permissions()&0111 != 0) { 93 if creds.HasCapability(linux.CAP_DAC_OVERRIDE) { 94 return nil 95 } 96 } 97 return linuxerr.EACCES 98 } 99 100 // MayLink determines whether creating a hard link to a file with the given 101 // mode, kuid, and kgid is permitted. 102 // 103 // This corresponds to Linux's fs/namei.c:may_linkat. 104 func MayLink(creds *auth.Credentials, mode linux.FileMode, kuid auth.KUID, kgid auth.KGID) error { 105 // Source inode owner can hardlink all they like; otherwise, it must be a 106 // safe source. 107 if CanActAsOwner(creds, kuid) { 108 return nil 109 } 110 111 // Only regular files can be hard linked. 112 if mode.FileType() != linux.S_IFREG { 113 return linuxerr.EPERM 114 } 115 116 // Setuid files should not get pinned to the filesystem. 117 if mode&linux.S_ISUID != 0 { 118 return linuxerr.EPERM 119 } 120 121 // Executable setgid files should not get pinned to the filesystem, but we 122 // don't support S_IXGRP anyway. 123 124 // Hardlinking to unreadable or unwritable sources is dangerous. 125 if err := GenericCheckPermissions(creds, MayRead|MayWrite, mode, kuid, kgid); err != nil { 126 return linuxerr.EPERM 127 } 128 return nil 129 } 130 131 // AccessTypesForOpenFlags returns the access types required to open a file 132 // with the given OpenOptions.Flags. Note that this is NOT the same thing as 133 // the set of accesses permitted for the opened file: 134 // 135 // - O_TRUNC causes MayWrite to be set in the returned AccessTypes (since it 136 // mutates the file), but does not permit writing to the open file description 137 // thereafter. 138 // 139 // - "Linux reserves the special, nonstandard access mode 3 (binary 11) in 140 // flags to mean: check for read and write permission on the file and return a 141 // file descriptor that can't be used for reading or writing." - open(2). Thus 142 // AccessTypesForOpenFlags returns MayRead|MayWrite in this case. 143 // 144 // Use May{Read,Write}FileWithOpenFlags() for these checks instead. 145 func AccessTypesForOpenFlags(opts *OpenOptions) AccessTypes { 146 ats := AccessTypes(0) 147 if opts.FileExec { 148 ats |= MayExec 149 } 150 151 switch opts.Flags & linux.O_ACCMODE { 152 case linux.O_RDONLY: 153 if opts.Flags&linux.O_TRUNC != 0 { 154 return ats | MayRead | MayWrite 155 } 156 return ats | MayRead 157 case linux.O_WRONLY: 158 return ats | MayWrite 159 default: 160 return ats | MayRead | MayWrite 161 } 162 } 163 164 // MayReadFileWithOpenFlags returns true if a file with the given open flags 165 // should be readable. 166 func MayReadFileWithOpenFlags(flags uint32) bool { 167 switch flags & linux.O_ACCMODE { 168 case linux.O_RDONLY, linux.O_RDWR: 169 return true 170 default: 171 return false 172 } 173 } 174 175 // MayWriteFileWithOpenFlags returns true if a file with the given open flags 176 // should be writable. 177 func MayWriteFileWithOpenFlags(flags uint32) bool { 178 switch flags & linux.O_ACCMODE { 179 case linux.O_WRONLY, linux.O_RDWR: 180 return true 181 default: 182 return false 183 } 184 } 185 186 // CheckSetStat checks that creds has permission to change the metadata of a 187 // file with the given permissions, UID, and GID as specified by stat, subject 188 // to the rules of Linux's fs/attr.c:setattr_prepare(). 189 func CheckSetStat(ctx context.Context, creds *auth.Credentials, opts *SetStatOptions, mode linux.FileMode, kuid auth.KUID, kgid auth.KGID) error { 190 stat := &opts.Stat 191 if stat.Mask&linux.STATX_SIZE != 0 { 192 limit, err := CheckLimit(ctx, 0, int64(stat.Size)) 193 if err != nil { 194 return err 195 } 196 if limit < int64(stat.Size) { 197 return linuxerr.ErrExceedsFileSizeLimit 198 } 199 } 200 if stat.Mask&linux.STATX_MODE != 0 { 201 if !CanActAsOwner(creds, kuid) { 202 return linuxerr.EPERM 203 } 204 // TODO(b/30815691): "If the calling process is not privileged (Linux: 205 // does not have the CAP_FSETID capability), and the group of the file 206 // does not match the effective group ID of the process or one of its 207 // supplementary group IDs, the S_ISGID bit will be turned off, but 208 // this will not cause an error to be returned." - chmod(2) 209 } 210 if stat.Mask&linux.STATX_UID != 0 { 211 if !((creds.EffectiveKUID == kuid && auth.KUID(stat.UID) == kuid) || 212 HasCapabilityOnFile(creds, linux.CAP_CHOWN, kuid, kgid)) { 213 return linuxerr.EPERM 214 } 215 } 216 if stat.Mask&linux.STATX_GID != 0 { 217 if !((creds.EffectiveKUID == kuid && creds.InGroup(auth.KGID(stat.GID))) || 218 HasCapabilityOnFile(creds, linux.CAP_CHOWN, kuid, kgid)) { 219 return linuxerr.EPERM 220 } 221 } 222 if opts.NeedWritePerm && !creds.HasCapability(linux.CAP_DAC_OVERRIDE) { 223 if err := GenericCheckPermissions(creds, MayWrite, mode, kuid, kgid); err != nil { 224 return err 225 } 226 } 227 if stat.Mask&(linux.STATX_ATIME|linux.STATX_MTIME|linux.STATX_CTIME) != 0 { 228 if !CanActAsOwner(creds, kuid) { 229 if (stat.Mask&linux.STATX_ATIME != 0 && stat.Atime.Nsec != linux.UTIME_NOW) || 230 (stat.Mask&linux.STATX_MTIME != 0 && stat.Mtime.Nsec != linux.UTIME_NOW) || 231 (stat.Mask&linux.STATX_CTIME != 0 && stat.Ctime.Nsec != linux.UTIME_NOW) { 232 return linuxerr.EPERM 233 } 234 if err := GenericCheckPermissions(creds, MayWrite, mode, kuid, kgid); err != nil { 235 return err 236 } 237 } 238 } 239 return nil 240 } 241 242 // CheckDeleteSticky checks whether the sticky bit is set on a directory with 243 // the given file mode, and if so, checks whether creds has permission to 244 // remove a file owned by childKUID from a directory with the given mode. 245 // CheckDeleteSticky is consistent with fs/linux.h:check_sticky(). 246 func CheckDeleteSticky(creds *auth.Credentials, parentMode linux.FileMode, parentKUID auth.KUID, childKUID auth.KUID, childKGID auth.KGID) error { 247 if parentMode&linux.ModeSticky == 0 { 248 return nil 249 } 250 if creds.EffectiveKUID == childKUID || 251 creds.EffectiveKUID == parentKUID || 252 HasCapabilityOnFile(creds, linux.CAP_FOWNER, childKUID, childKGID) { 253 return nil 254 } 255 return linuxerr.EPERM 256 } 257 258 // CanActAsOwner returns true if creds can act as the owner of a file with the 259 // given owning UID, consistent with Linux's 260 // fs/inode.c:inode_owner_or_capable(). 261 func CanActAsOwner(creds *auth.Credentials, kuid auth.KUID) bool { 262 if creds.EffectiveKUID == kuid { 263 return true 264 } 265 return creds.HasCapability(linux.CAP_FOWNER) && creds.UserNamespace.MapFromKUID(kuid).Ok() 266 } 267 268 // HasCapabilityOnFile returns true if creds has the given capability with 269 // respect to a file with the given owning UID and GID, consistent with Linux's 270 // kernel/capability.c:capable_wrt_inode_uidgid(). 271 func HasCapabilityOnFile(creds *auth.Credentials, cp linux.Capability, kuid auth.KUID, kgid auth.KGID) bool { 272 return creds.HasCapability(cp) && creds.UserNamespace.MapFromKUID(kuid).Ok() && creds.UserNamespace.MapFromKGID(kgid).Ok() 273 } 274 275 // CheckLimit enforces file size rlimits. It returns error if the write 276 // operation must not proceed. Otherwise it returns the max length allowed to 277 // without violating the limit. 278 func CheckLimit(ctx context.Context, offset, size int64) (int64, error) { 279 fileSizeLimit := limits.FromContextOrDie(ctx).Get(limits.FileSize).Cur 280 if fileSizeLimit > math.MaxInt64 { 281 return size, nil 282 } 283 if offset >= int64(fileSizeLimit) { 284 return 0, linuxerr.ErrExceedsFileSizeLimit 285 } 286 remaining := int64(fileSizeLimit) - offset 287 if remaining < size { 288 return remaining, nil 289 } 290 return size, nil 291 } 292 293 // CheckXattrPermissions checks permissions for extended attribute access. 294 // This is analogous to fs/xattr.c:xattr_permission(). Some key differences: 295 // - Does not check for read-only filesystem property. 296 // - Does not check inode immutability or append only mode. In both cases EPERM 297 // must be returned by filesystem implementations. 298 // - Does not do inode permission checks. Filesystem implementations should 299 // handle inode permission checks as they may differ across implementations. 300 func CheckXattrPermissions(creds *auth.Credentials, ats AccessTypes, mode linux.FileMode, kuid auth.KUID, name string) error { 301 switch { 302 case strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX): 303 // The trusted.* namespace can only be accessed by privileged 304 // users. 305 if creds.HasCapability(linux.CAP_SYS_ADMIN) { 306 return nil 307 } 308 if ats.MayWrite() { 309 return linuxerr.EPERM 310 } 311 return linuxerr.ENODATA 312 case strings.HasPrefix(name, linux.XATTR_USER_PREFIX): 313 // In the user.* namespace, only regular files and directories can have 314 // extended attributes. For sticky directories, only the owner and 315 // privileged users can write attributes. 316 filetype := mode.FileType() 317 if filetype != linux.ModeRegular && filetype != linux.ModeDirectory { 318 if ats.MayWrite() { 319 return linuxerr.EPERM 320 } 321 return linuxerr.ENODATA 322 } 323 if filetype == linux.ModeDirectory && mode&linux.ModeSticky != 0 && ats.MayWrite() && !CanActAsOwner(creds, kuid) { 324 return linuxerr.EPERM 325 } 326 case strings.HasPrefix(name, linux.XATTR_SECURITY_PREFIX): 327 if ats.MayRead() { 328 return nil 329 } 330 return linuxerr.EOPNOTSUPP 331 } 332 return nil 333 } 334 335 // ClearSUIDAndSGID clears the setuid and/or setgid bits after a chown or write. 336 // Depending on the mode, neither bit, only the setuid bit, or both are cleared. 337 func ClearSUIDAndSGID(mode uint32) uint32 { 338 // Directories don't have their bits changed. 339 if mode&linux.ModeDirectory == linux.ModeDirectory { 340 return mode 341 } 342 343 // Changing owners always disables the setuid bit. It disables 344 // the setgid bit when the file is executable. 345 mode &= ^uint32(linux.ModeSetUID) 346 if sgid := uint32(linux.ModeSetGID | linux.ModeGroupExec); mode&sgid == sgid { 347 mode &= ^uint32(linux.ModeSetGID) 348 } 349 return mode 350 }