github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/overlay/regular_file.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package overlay 16 17 import ( 18 "github.com/metacubex/gvisor/pkg/abi/linux" 19 "github.com/metacubex/gvisor/pkg/context" 20 "github.com/metacubex/gvisor/pkg/errors/linuxerr" 21 "github.com/metacubex/gvisor/pkg/hostarch" 22 "github.com/metacubex/gvisor/pkg/log" 23 "github.com/metacubex/gvisor/pkg/sentry/arch" 24 "github.com/metacubex/gvisor/pkg/sentry/kernel/auth" 25 "github.com/metacubex/gvisor/pkg/sentry/memmap" 26 "github.com/metacubex/gvisor/pkg/sentry/vfs" 27 "github.com/metacubex/gvisor/pkg/usermem" 28 "github.com/metacubex/gvisor/pkg/waiter" 29 ) 30 31 func (d *dentry) isRegularFile() bool { 32 return d.mode.Load()&linux.S_IFMT == linux.S_IFREG 33 } 34 35 func (d *dentry) isSymlink() bool { 36 return d.mode.Load()&linux.S_IFMT == linux.S_IFLNK 37 } 38 39 func (d *dentry) readlink(ctx context.Context) (string, error) { 40 layerVD := d.topLayer() 41 return d.fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{ 42 Root: layerVD, 43 Start: layerVD, 44 }) 45 } 46 47 // +stateify savable 48 type regularFileFD struct { 49 fileDescription 50 51 // If copiedUp is false, cachedFD represents 52 // fileDescription.dentry().lowerVDs[0]; otherwise, cachedFD represents 53 // fileDescription.dentry().upperVD. cachedFlags is the last known value of 54 // cachedFD.StatusFlags(). copiedUp, cachedFD, and cachedFlags are 55 // protected by mu. 56 mu regularFileFDMutex `state:"nosave"` 57 copiedUp bool 58 cachedFD *vfs.FileDescription 59 cachedFlags uint32 60 } 61 62 func (fd *regularFileFD) getCurrentFD(ctx context.Context) (*vfs.FileDescription, error) { 63 fd.mu.Lock() 64 defer fd.mu.Unlock() 65 wrappedFD, err := fd.currentFDLocked(ctx) 66 if err != nil { 67 return nil, err 68 } 69 wrappedFD.IncRef() 70 return wrappedFD, nil 71 } 72 73 func (fd *regularFileFD) currentFDLocked(ctx context.Context) (*vfs.FileDescription, error) { 74 d := fd.dentry() 75 statusFlags := fd.vfsfd.StatusFlags() 76 if !fd.copiedUp && d.isCopiedUp() { 77 // Switch to the copied-up file. 78 upperVD := d.topLayer() 79 upperFD, err := fd.filesystem().vfsfs.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{ 80 Root: upperVD, 81 Start: upperVD, 82 }, &vfs.OpenOptions{ 83 Flags: statusFlags, 84 }) 85 if err != nil { 86 return nil, err 87 } 88 oldOff, oldOffErr := fd.cachedFD.Seek(ctx, 0, linux.SEEK_CUR) 89 if oldOffErr == nil { 90 if _, err := upperFD.Seek(ctx, oldOff, linux.SEEK_SET); err != nil { 91 upperFD.DecRef(ctx) 92 return nil, err 93 } 94 } 95 fd.cachedFD.DecRef(ctx) 96 fd.copiedUp = true 97 fd.cachedFD = upperFD 98 fd.cachedFlags = statusFlags 99 } else if fd.cachedFlags != statusFlags { 100 if err := fd.cachedFD.SetStatusFlags(ctx, d.fs.creds, statusFlags); err != nil { 101 return nil, err 102 } 103 fd.cachedFlags = statusFlags 104 } 105 return fd.cachedFD, nil 106 } 107 108 // Release implements vfs.FileDescriptionImpl.Release. 109 func (fd *regularFileFD) Release(ctx context.Context) { 110 fd.cachedFD.DecRef(ctx) 111 fd.cachedFD = nil 112 } 113 114 // OnClose implements vfs.FileDescriptionImpl.OnClose. 115 func (fd *regularFileFD) OnClose(ctx context.Context) error { 116 // Linux doesn't define ovl_file_operations.flush at all (i.e. its 117 // equivalent to OnClose is a no-op). We pass through to 118 // fd.cachedFD.OnClose() without upgrading if fd.dentry() has been 119 // copied-up, since OnClose is mostly used to define post-close writeback, 120 // and if fd.cachedFD hasn't been updated then it can't have been used to 121 // mutate fd.dentry() anyway. 122 fd.mu.Lock() 123 if statusFlags := fd.vfsfd.StatusFlags(); fd.cachedFlags != statusFlags { 124 if err := fd.cachedFD.SetStatusFlags(ctx, fd.filesystem().creds, statusFlags); err != nil { 125 fd.mu.Unlock() 126 return err 127 } 128 fd.cachedFlags = statusFlags 129 } 130 wrappedFD := fd.cachedFD 131 fd.mu.Unlock() 132 return wrappedFD.OnClose(ctx) 133 } 134 135 // Stat implements vfs.FileDescriptionImpl.Stat. 136 func (fd *regularFileFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { 137 var stat linux.Statx 138 if layerMask := opts.Mask &^ statInternalMask; layerMask != 0 { 139 wrappedFD, err := fd.getCurrentFD(ctx) 140 if err != nil { 141 return linux.Statx{}, err 142 } 143 stat, err = wrappedFD.Stat(ctx, vfs.StatOptions{ 144 Mask: layerMask, 145 Sync: opts.Sync, 146 }) 147 wrappedFD.DecRef(ctx) 148 if err != nil { 149 return linux.Statx{}, err 150 } 151 } 152 fd.dentry().statInternalTo(ctx, &opts, &stat) 153 return stat, nil 154 } 155 156 // Allocate implements vfs.FileDescriptionImpl.Allocate. 157 func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error { 158 wrappedFD, err := fd.getCurrentFD(ctx) 159 if err != nil { 160 return err 161 } 162 defer wrappedFD.DecRef(ctx) 163 return wrappedFD.Allocate(ctx, mode, offset, length) 164 } 165 166 // SetStat implements vfs.FileDescriptionImpl.SetStat. 167 func (fd *regularFileFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { 168 d := fd.dentry() 169 mode := linux.FileMode(d.mode.Load()) 170 if err := vfs.CheckSetStat(ctx, auth.CredentialsFromContext(ctx), &opts, mode, auth.KUID(d.uid.Load()), auth.KGID(d.gid.Load())); err != nil { 171 return err 172 } 173 mnt := fd.vfsfd.Mount() 174 if err := mnt.CheckBeginWrite(); err != nil { 175 return err 176 } 177 defer mnt.EndWrite() 178 if err := d.copyUpLocked(ctx); err != nil { 179 return err 180 } 181 // Changes to d's attributes are serialized by d.copyMu. 182 d.copyMu.Lock() 183 defer d.copyMu.Unlock() 184 wrappedFD, err := fd.currentFDLocked(ctx) 185 if err != nil { 186 return err 187 } 188 if err := wrappedFD.SetStat(ctx, opts); err != nil { 189 return err 190 } 191 192 // Changing owners or truncating may clear one or both of the setuid and 193 // setgid bits, so we may have to update opts before setting d.mode. 194 if opts.Stat.Mask&(linux.STATX_UID|linux.STATX_GID|linux.STATX_SIZE) != 0 { 195 stat, err := wrappedFD.Stat(ctx, vfs.StatOptions{ 196 Mask: linux.STATX_MODE, 197 }) 198 if err != nil { 199 return err 200 } 201 opts.Stat.Mode = stat.Mode 202 opts.Stat.Mask |= linux.STATX_MODE 203 } 204 205 d.updateAfterSetStatLocked(&opts) 206 return nil 207 } 208 209 // StatFS implements vfs.FileDescriptionImpl.StatFS. 210 func (fd *regularFileFD) StatFS(ctx context.Context) (linux.Statfs, error) { 211 return fd.filesystem().statFS(ctx) 212 } 213 214 // Readiness implements waiter.Waitable.Readiness. 215 func (fd *regularFileFD) Readiness(mask waiter.EventMask) waiter.EventMask { 216 ctx := context.Background() 217 wrappedFD, err := fd.getCurrentFD(ctx) 218 if err != nil { 219 // TODO(b/171089913): Just use fd.cachedFD since Readiness can't return 220 // an error. This is obviously wrong, but at least consistent with 221 // VFS1. 222 log.Warningf("overlay.regularFileFD.Readiness: currentFDLocked failed: %v", err) 223 fd.mu.Lock() 224 wrappedFD = fd.cachedFD 225 wrappedFD.IncRef() 226 fd.mu.Unlock() 227 } 228 defer wrappedFD.DecRef(ctx) 229 return wrappedFD.Readiness(mask) 230 } 231 232 // EventRegister implements waiter.Waitable.EventRegister. 233 func (fd *regularFileFD) EventRegister(e *waiter.Entry) error { 234 fd.mu.Lock() 235 defer fd.mu.Unlock() 236 wrappedFD, err := fd.currentFDLocked(context.Background()) 237 if err != nil { 238 // TODO(b/171089913): Just use fd.cachedFD for backward compatibility 239 // with VFS1. 240 log.Warningf("overlay.regularFileFD.EventRegister: currentFDLocked failed: %v", err) 241 wrappedFD = fd.cachedFD 242 } 243 return wrappedFD.EventRegister(e) 244 } 245 246 // EventUnregister implements waiter.Waitable.EventUnregister. 247 func (fd *regularFileFD) EventUnregister(e *waiter.Entry) { 248 fd.mu.Lock() 249 defer fd.mu.Unlock() 250 fd.cachedFD.EventUnregister(e) 251 } 252 253 // Epollable implements FileDescriptionImpl.Epollable. 254 func (fd *regularFileFD) Epollable() bool { 255 fd.mu.Lock() 256 defer fd.mu.Unlock() 257 wrappedFD, err := fd.currentFDLocked(context.Background()) 258 if err != nil { 259 // TODO(b/171089913): Just use fd.cachedFD since EventRegister can't 260 // return an error. This is obviously wrong, but at least consistent 261 // with VFS1. 262 log.Warningf("overlay.regularFileFD.Epollable: currentFDLocked failed: %v", err) 263 wrappedFD = fd.cachedFD 264 } 265 return wrappedFD.Epollable() 266 } 267 268 // PRead implements vfs.FileDescriptionImpl.PRead. 269 func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { 270 wrappedFD, err := fd.getCurrentFD(ctx) 271 if err != nil { 272 return 0, err 273 } 274 defer wrappedFD.DecRef(ctx) 275 return wrappedFD.PRead(ctx, dst, offset, opts) 276 } 277 278 // Read implements vfs.FileDescriptionImpl.Read. 279 func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { 280 // Hold fd.mu during the read to serialize the file offset. 281 fd.mu.Lock() 282 defer fd.mu.Unlock() 283 wrappedFD, err := fd.currentFDLocked(ctx) 284 if err != nil { 285 return 0, err 286 } 287 return wrappedFD.Read(ctx, dst, opts) 288 } 289 290 // PWrite implements vfs.FileDescriptionImpl.PWrite. 291 func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { 292 wrappedFD, err := fd.getCurrentFD(ctx) 293 if err != nil { 294 return 0, err 295 } 296 defer wrappedFD.DecRef(ctx) 297 n, err := wrappedFD.PWrite(ctx, src, offset, opts) 298 if err != nil { 299 return n, err 300 } 301 return fd.updateSetUserGroupIDs(ctx, wrappedFD, n) 302 } 303 304 // Write implements vfs.FileDescriptionImpl.Write. 305 func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { 306 // Hold fd.mu during the write to serialize the file offset. 307 fd.mu.Lock() 308 defer fd.mu.Unlock() 309 wrappedFD, err := fd.currentFDLocked(ctx) 310 if err != nil { 311 return 0, err 312 } 313 n, err := wrappedFD.Write(ctx, src, opts) 314 if err != nil { 315 return n, err 316 } 317 return fd.updateSetUserGroupIDs(ctx, wrappedFD, n) 318 } 319 320 func (fd *regularFileFD) updateSetUserGroupIDs(ctx context.Context, wrappedFD *vfs.FileDescription, written int64) (int64, error) { 321 // Writing can clear the setuid and/or setgid bits. We only have to 322 // check this if something was written and one of those bits was set. 323 dentry := fd.dentry() 324 if written == 0 || dentry.mode.Load()&(linux.S_ISUID|linux.S_ISGID) == 0 { 325 return written, nil 326 } 327 stat, err := wrappedFD.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_MODE}) 328 if err != nil { 329 return written, err 330 } 331 dentry.copyMu.Lock() 332 defer dentry.copyMu.Unlock() 333 dentry.mode.Store(uint32(stat.Mode)) 334 return written, nil 335 } 336 337 // Seek implements vfs.FileDescriptionImpl.Seek. 338 func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { 339 // Hold fd.mu during the seek to serialize the file offset. 340 fd.mu.Lock() 341 defer fd.mu.Unlock() 342 wrappedFD, err := fd.currentFDLocked(ctx) 343 if err != nil { 344 return 0, err 345 } 346 return wrappedFD.Seek(ctx, offset, whence) 347 } 348 349 // Sync implements vfs.FileDescriptionImpl.Sync. 350 func (fd *regularFileFD) Sync(ctx context.Context) error { 351 fd.mu.Lock() 352 if !fd.dentry().isCopiedUp() { 353 fd.mu.Unlock() 354 return nil 355 } 356 wrappedFD, err := fd.currentFDLocked(ctx) 357 if err != nil { 358 fd.mu.Unlock() 359 return err 360 } 361 wrappedFD.IncRef() 362 defer wrappedFD.DecRef(ctx) 363 fd.mu.Unlock() 364 return wrappedFD.Sync(ctx) 365 } 366 367 // Ioctl implements vfs.FileDescriptionImpl.Ioctl. 368 func (fd *regularFileFD) Ioctl(ctx context.Context, uio usermem.IO, sysno uintptr, args arch.SyscallArguments) (uintptr, error) { 369 wrappedFD, err := fd.getCurrentFD(ctx) 370 if err != nil { 371 return 0, err 372 } 373 defer wrappedFD.DecRef(ctx) 374 return wrappedFD.Ioctl(ctx, uio, sysno, args) 375 } 376 377 // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. 378 func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { 379 if err := fd.ensureMappable(ctx, opts); err != nil { 380 return err 381 } 382 return vfs.GenericConfigureMMap(&fd.vfsfd, fd.dentry(), opts) 383 } 384 385 // ensureMappable ensures that fd.dentry().wrappedMappable is not nil. 386 func (fd *regularFileFD) ensureMappable(ctx context.Context, opts *memmap.MMapOpts) error { 387 d := fd.dentry() 388 389 // Fast path if we already have a Mappable for the current top layer. 390 if d.isMappable.Load() != 0 { 391 return nil 392 } 393 394 // Only permit mmap of regular files, since other file types may have 395 // unpredictable behavior when mmapped (e.g. /dev/zero). 396 if d.mode.Load()&linux.S_IFMT != linux.S_IFREG { 397 return linuxerr.ENODEV 398 } 399 400 // Get a Mappable for the current top layer. 401 fd.mu.Lock() 402 defer fd.mu.Unlock() 403 d.copyMu.RLock() 404 defer d.copyMu.RUnlock() 405 if d.isMappable.Load() != 0 { 406 return nil 407 } 408 wrappedFD, err := fd.currentFDLocked(ctx) 409 if err != nil { 410 return err 411 } 412 if err := wrappedFD.ConfigureMMap(ctx, opts); err != nil { 413 return err 414 } 415 if opts.MappingIdentity != nil { 416 opts.MappingIdentity.DecRef(ctx) 417 opts.MappingIdentity = nil 418 } 419 // Use this Mappable for all mappings of this layer (unless we raced with 420 // another call to ensureMappable). 421 d.mapsMu.Lock() 422 defer d.mapsMu.Unlock() 423 d.dataMu.Lock() 424 defer d.dataMu.Unlock() 425 if d.wrappedMappable == nil { 426 d.wrappedMappable = opts.Mappable 427 d.isMappable.Store(1) 428 } 429 return nil 430 } 431 432 // AddMapping implements memmap.Mappable.AddMapping. 433 func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { 434 d.mapsMu.Lock() 435 defer d.mapsMu.Unlock() 436 if err := d.wrappedMappable.AddMapping(ctx, ms, ar, offset, writable); err != nil { 437 return err 438 } 439 if !d.isCopiedUp() { 440 d.lowerMappings.AddMapping(ms, ar, offset, writable) 441 } 442 return nil 443 } 444 445 // RemoveMapping implements memmap.Mappable.RemoveMapping. 446 func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { 447 d.mapsMu.Lock() 448 defer d.mapsMu.Unlock() 449 d.wrappedMappable.RemoveMapping(ctx, ms, ar, offset, writable) 450 if !d.isCopiedUp() { 451 d.lowerMappings.RemoveMapping(ms, ar, offset, writable) 452 } 453 } 454 455 // CopyMapping implements memmap.Mappable.CopyMapping. 456 func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { 457 d.mapsMu.Lock() 458 defer d.mapsMu.Unlock() 459 if err := d.wrappedMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil { 460 return err 461 } 462 if !d.isCopiedUp() { 463 d.lowerMappings.AddMapping(ms, dstAR, offset, writable) 464 } 465 return nil 466 } 467 468 // Translate implements memmap.Mappable.Translate. 469 func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { 470 d.dataMu.RLock() 471 defer d.dataMu.RUnlock() 472 return d.wrappedMappable.Translate(ctx, required, optional, at) 473 } 474 475 // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. 476 func (d *dentry) InvalidateUnsavable(ctx context.Context) error { 477 d.mapsMu.Lock() 478 defer d.mapsMu.Unlock() 479 return d.wrappedMappable.InvalidateUnsavable(ctx) 480 }