github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/overlay/regular_file.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package overlay 16 17 import ( 18 "sync/atomic" 19 20 "github.com/SagerNet/gvisor/pkg/abi/linux" 21 "github.com/SagerNet/gvisor/pkg/context" 22 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 23 "github.com/SagerNet/gvisor/pkg/hostarch" 24 "github.com/SagerNet/gvisor/pkg/log" 25 "github.com/SagerNet/gvisor/pkg/sentry/arch" 26 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 27 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 28 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 29 "github.com/SagerNet/gvisor/pkg/sync" 30 "github.com/SagerNet/gvisor/pkg/usermem" 31 "github.com/SagerNet/gvisor/pkg/waiter" 32 ) 33 34 func (d *dentry) isRegularFile() bool { 35 return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFREG 36 } 37 38 func (d *dentry) isSymlink() bool { 39 return atomic.LoadUint32(&d.mode)&linux.S_IFMT == linux.S_IFLNK 40 } 41 42 func (d *dentry) readlink(ctx context.Context) (string, error) { 43 layerVD := d.topLayer() 44 return d.fs.vfsfs.VirtualFilesystem().ReadlinkAt(ctx, d.fs.creds, &vfs.PathOperation{ 45 Root: layerVD, 46 Start: layerVD, 47 }) 48 } 49 50 // +stateify savable 51 type regularFileFD struct { 52 fileDescription 53 54 // If copiedUp is false, cachedFD represents 55 // fileDescription.dentry().lowerVDs[0]; otherwise, cachedFD represents 56 // fileDescription.dentry().upperVD. cachedFlags is the last known value of 57 // cachedFD.StatusFlags(). copiedUp, cachedFD, and cachedFlags are 58 // protected by mu. 59 mu sync.Mutex `state:"nosave"` 60 copiedUp bool 61 cachedFD *vfs.FileDescription 62 cachedFlags uint32 63 64 // If copiedUp is false, lowerWaiters contains all waiter.Entries 65 // registered with cachedFD. lowerWaiters is protected by mu. 66 lowerWaiters map[*waiter.Entry]waiter.EventMask 67 } 68 69 func (fd *regularFileFD) getCurrentFD(ctx context.Context) (*vfs.FileDescription, error) { 70 fd.mu.Lock() 71 defer fd.mu.Unlock() 72 wrappedFD, err := fd.currentFDLocked(ctx) 73 if err != nil { 74 return nil, err 75 } 76 wrappedFD.IncRef() 77 return wrappedFD, nil 78 } 79 80 func (fd *regularFileFD) currentFDLocked(ctx context.Context) (*vfs.FileDescription, error) { 81 d := fd.dentry() 82 statusFlags := fd.vfsfd.StatusFlags() 83 if !fd.copiedUp && d.isCopiedUp() { 84 // Switch to the copied-up file. 85 upperVD := d.topLayer() 86 upperFD, err := fd.filesystem().vfsfs.VirtualFilesystem().OpenAt(ctx, d.fs.creds, &vfs.PathOperation{ 87 Root: upperVD, 88 Start: upperVD, 89 }, &vfs.OpenOptions{ 90 Flags: statusFlags, 91 }) 92 if err != nil { 93 return nil, err 94 } 95 oldOff, oldOffErr := fd.cachedFD.Seek(ctx, 0, linux.SEEK_CUR) 96 if oldOffErr == nil { 97 if _, err := upperFD.Seek(ctx, oldOff, linux.SEEK_SET); err != nil { 98 upperFD.DecRef(ctx) 99 return nil, err 100 } 101 } 102 if len(fd.lowerWaiters) != 0 { 103 ready := upperFD.Readiness(^waiter.EventMask(0)) 104 for e, mask := range fd.lowerWaiters { 105 fd.cachedFD.EventUnregister(e) 106 upperFD.EventRegister(e, mask) 107 if m := ready & mask; m != 0 { 108 e.Callback.Callback(e, m) 109 } 110 } 111 } 112 fd.cachedFD.DecRef(ctx) 113 fd.copiedUp = true 114 fd.cachedFD = upperFD 115 fd.cachedFlags = statusFlags 116 fd.lowerWaiters = nil 117 } else if fd.cachedFlags != statusFlags { 118 if err := fd.cachedFD.SetStatusFlags(ctx, d.fs.creds, statusFlags); err != nil { 119 return nil, err 120 } 121 fd.cachedFlags = statusFlags 122 } 123 return fd.cachedFD, nil 124 } 125 126 // Release implements vfs.FileDescriptionImpl.Release. 127 func (fd *regularFileFD) Release(ctx context.Context) { 128 fd.cachedFD.DecRef(ctx) 129 fd.cachedFD = nil 130 } 131 132 // OnClose implements vfs.FileDescriptionImpl.OnClose. 133 func (fd *regularFileFD) OnClose(ctx context.Context) error { 134 // Linux doesn't define ovl_file_operations.flush at all (i.e. its 135 // equivalent to OnClose is a no-op). We pass through to 136 // fd.cachedFD.OnClose() without upgrading if fd.dentry() has been 137 // copied-up, since OnClose is mostly used to define post-close writeback, 138 // and if fd.cachedFD hasn't been updated then it can't have been used to 139 // mutate fd.dentry() anyway. 140 fd.mu.Lock() 141 if statusFlags := fd.vfsfd.StatusFlags(); fd.cachedFlags != statusFlags { 142 if err := fd.cachedFD.SetStatusFlags(ctx, fd.filesystem().creds, statusFlags); err != nil { 143 fd.mu.Unlock() 144 return err 145 } 146 fd.cachedFlags = statusFlags 147 } 148 wrappedFD := fd.cachedFD 149 fd.mu.Unlock() 150 return wrappedFD.OnClose(ctx) 151 } 152 153 // Stat implements vfs.FileDescriptionImpl.Stat. 154 func (fd *regularFileFD) Stat(ctx context.Context, opts vfs.StatOptions) (linux.Statx, error) { 155 var stat linux.Statx 156 if layerMask := opts.Mask &^ statInternalMask; layerMask != 0 { 157 wrappedFD, err := fd.getCurrentFD(ctx) 158 if err != nil { 159 return linux.Statx{}, err 160 } 161 stat, err = wrappedFD.Stat(ctx, vfs.StatOptions{ 162 Mask: layerMask, 163 Sync: opts.Sync, 164 }) 165 wrappedFD.DecRef(ctx) 166 if err != nil { 167 return linux.Statx{}, err 168 } 169 } 170 fd.dentry().statInternalTo(ctx, &opts, &stat) 171 return stat, nil 172 } 173 174 // Allocate implements vfs.FileDescriptionImpl.Allocate. 175 func (fd *regularFileFD) Allocate(ctx context.Context, mode, offset, length uint64) error { 176 wrappedFD, err := fd.getCurrentFD(ctx) 177 if err != nil { 178 return err 179 } 180 defer wrappedFD.DecRef(ctx) 181 return wrappedFD.Allocate(ctx, mode, offset, length) 182 } 183 184 // SetStat implements vfs.FileDescriptionImpl.SetStat. 185 func (fd *regularFileFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) error { 186 d := fd.dentry() 187 mode := linux.FileMode(atomic.LoadUint32(&d.mode)) 188 if err := vfs.CheckSetStat(ctx, auth.CredentialsFromContext(ctx), &opts, mode, auth.KUID(atomic.LoadUint32(&d.uid)), auth.KGID(atomic.LoadUint32(&d.gid))); err != nil { 189 return err 190 } 191 mnt := fd.vfsfd.Mount() 192 if err := mnt.CheckBeginWrite(); err != nil { 193 return err 194 } 195 defer mnt.EndWrite() 196 if err := d.copyUpLocked(ctx); err != nil { 197 return err 198 } 199 // Changes to d's attributes are serialized by d.copyMu. 200 d.copyMu.Lock() 201 defer d.copyMu.Unlock() 202 wrappedFD, err := fd.currentFDLocked(ctx) 203 if err != nil { 204 return err 205 } 206 if err := wrappedFD.SetStat(ctx, opts); err != nil { 207 return err 208 } 209 210 // Changing owners or truncating may clear one or both of the setuid and 211 // setgid bits, so we may have to update opts before setting d.mode. 212 inotifyMask := opts.Stat.Mask 213 if opts.Stat.Mask&(linux.STATX_UID|linux.STATX_GID|linux.STATX_SIZE) != 0 { 214 stat, err := wrappedFD.Stat(ctx, vfs.StatOptions{ 215 Mask: linux.STATX_MODE, 216 }) 217 if err != nil { 218 return err 219 } 220 opts.Stat.Mode = stat.Mode 221 opts.Stat.Mask |= linux.STATX_MODE 222 // Don't generate inotify IN_ATTRIB for size-only changes (truncations). 223 if opts.Stat.Mask&(linux.STATX_UID|linux.STATX_GID) != 0 { 224 inotifyMask |= linux.STATX_MODE 225 } 226 } 227 228 d.updateAfterSetStatLocked(&opts) 229 if ev := vfs.InotifyEventFromStatMask(inotifyMask); ev != 0 { 230 d.InotifyWithParent(ctx, ev, 0, vfs.InodeEvent) 231 } 232 return nil 233 } 234 235 // StatFS implements vfs.FileDescriptionImpl.StatFS. 236 func (fd *regularFileFD) StatFS(ctx context.Context) (linux.Statfs, error) { 237 return fd.filesystem().statFS(ctx) 238 } 239 240 // Readiness implements waiter.Waitable.Readiness. 241 func (fd *regularFileFD) Readiness(mask waiter.EventMask) waiter.EventMask { 242 ctx := context.Background() 243 wrappedFD, err := fd.getCurrentFD(ctx) 244 if err != nil { 245 // TODO(b/171089913): Just use fd.cachedFD since Readiness can't return 246 // an error. This is obviously wrong, but at least consistent with 247 // VFS1. 248 log.Warningf("overlay.regularFileFD.Readiness: currentFDLocked failed: %v", err) 249 fd.mu.Lock() 250 wrappedFD = fd.cachedFD 251 wrappedFD.IncRef() 252 fd.mu.Unlock() 253 } 254 defer wrappedFD.DecRef(ctx) 255 return wrappedFD.Readiness(mask) 256 } 257 258 // EventRegister implements waiter.Waitable.EventRegister. 259 func (fd *regularFileFD) EventRegister(e *waiter.Entry, mask waiter.EventMask) { 260 fd.mu.Lock() 261 defer fd.mu.Unlock() 262 wrappedFD, err := fd.currentFDLocked(context.Background()) 263 if err != nil { 264 // TODO(b/171089913): Just use fd.cachedFD since EventRegister can't 265 // return an error. This is obviously wrong, but at least consistent 266 // with VFS1. 267 log.Warningf("overlay.regularFileFD.EventRegister: currentFDLocked failed: %v", err) 268 wrappedFD = fd.cachedFD 269 } 270 wrappedFD.EventRegister(e, mask) 271 if !fd.copiedUp { 272 if fd.lowerWaiters == nil { 273 fd.lowerWaiters = make(map[*waiter.Entry]waiter.EventMask) 274 } 275 fd.lowerWaiters[e] = mask 276 } 277 } 278 279 // EventUnregister implements waiter.Waitable.EventUnregister. 280 func (fd *regularFileFD) EventUnregister(e *waiter.Entry) { 281 fd.mu.Lock() 282 defer fd.mu.Unlock() 283 fd.cachedFD.EventUnregister(e) 284 if !fd.copiedUp { 285 delete(fd.lowerWaiters, e) 286 } 287 } 288 289 // PRead implements vfs.FileDescriptionImpl.PRead. 290 func (fd *regularFileFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64, opts vfs.ReadOptions) (int64, error) { 291 wrappedFD, err := fd.getCurrentFD(ctx) 292 if err != nil { 293 return 0, err 294 } 295 defer wrappedFD.DecRef(ctx) 296 return wrappedFD.PRead(ctx, dst, offset, opts) 297 } 298 299 // Read implements vfs.FileDescriptionImpl.Read. 300 func (fd *regularFileFD) Read(ctx context.Context, dst usermem.IOSequence, opts vfs.ReadOptions) (int64, error) { 301 // Hold fd.mu during the read to serialize the file offset. 302 fd.mu.Lock() 303 defer fd.mu.Unlock() 304 wrappedFD, err := fd.currentFDLocked(ctx) 305 if err != nil { 306 return 0, err 307 } 308 return wrappedFD.Read(ctx, dst, opts) 309 } 310 311 // PWrite implements vfs.FileDescriptionImpl.PWrite. 312 func (fd *regularFileFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int64, opts vfs.WriteOptions) (int64, error) { 313 wrappedFD, err := fd.getCurrentFD(ctx) 314 if err != nil { 315 return 0, err 316 } 317 defer wrappedFD.DecRef(ctx) 318 n, err := wrappedFD.PWrite(ctx, src, offset, opts) 319 if err != nil { 320 return n, err 321 } 322 return fd.updateSetUserGroupIDs(ctx, wrappedFD, n) 323 } 324 325 // Write implements vfs.FileDescriptionImpl.Write. 326 func (fd *regularFileFD) Write(ctx context.Context, src usermem.IOSequence, opts vfs.WriteOptions) (int64, error) { 327 // Hold fd.mu during the write to serialize the file offset. 328 fd.mu.Lock() 329 defer fd.mu.Unlock() 330 wrappedFD, err := fd.currentFDLocked(ctx) 331 if err != nil { 332 return 0, err 333 } 334 n, err := wrappedFD.Write(ctx, src, opts) 335 if err != nil { 336 return n, err 337 } 338 return fd.updateSetUserGroupIDs(ctx, wrappedFD, n) 339 } 340 341 func (fd *regularFileFD) updateSetUserGroupIDs(ctx context.Context, wrappedFD *vfs.FileDescription, written int64) (int64, error) { 342 // Writing can clear the setuid and/or setgid bits. We only have to 343 // check this if something was written and one of those bits was set. 344 dentry := fd.dentry() 345 if written == 0 || atomic.LoadUint32(&dentry.mode)&(linux.S_ISUID|linux.S_ISGID) == 0 { 346 return written, nil 347 } 348 stat, err := wrappedFD.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_MODE}) 349 if err != nil { 350 return written, err 351 } 352 dentry.copyMu.Lock() 353 defer dentry.copyMu.Unlock() 354 atomic.StoreUint32(&dentry.mode, uint32(stat.Mode)) 355 return written, nil 356 } 357 358 // Seek implements vfs.FileDescriptionImpl.Seek. 359 func (fd *regularFileFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) { 360 // Hold fd.mu during the seek to serialize the file offset. 361 fd.mu.Lock() 362 defer fd.mu.Unlock() 363 wrappedFD, err := fd.currentFDLocked(ctx) 364 if err != nil { 365 return 0, err 366 } 367 return wrappedFD.Seek(ctx, offset, whence) 368 } 369 370 // Sync implements vfs.FileDescriptionImpl.Sync. 371 func (fd *regularFileFD) Sync(ctx context.Context) error { 372 fd.mu.Lock() 373 if !fd.dentry().isCopiedUp() { 374 fd.mu.Unlock() 375 return nil 376 } 377 wrappedFD, err := fd.currentFDLocked(ctx) 378 if err != nil { 379 fd.mu.Unlock() 380 return err 381 } 382 wrappedFD.IncRef() 383 defer wrappedFD.DecRef(ctx) 384 fd.mu.Unlock() 385 return wrappedFD.Sync(ctx) 386 } 387 388 // Ioctl implements vfs.FileDescriptionImpl.Ioctl. 389 func (fd *regularFileFD) Ioctl(ctx context.Context, uio usermem.IO, args arch.SyscallArguments) (uintptr, error) { 390 wrappedFD, err := fd.getCurrentFD(ctx) 391 if err != nil { 392 return 0, err 393 } 394 defer wrappedFD.DecRef(ctx) 395 return wrappedFD.Ioctl(ctx, uio, args) 396 } 397 398 // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap. 399 func (fd *regularFileFD) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error { 400 if err := fd.ensureMappable(ctx, opts); err != nil { 401 return err 402 } 403 return vfs.GenericConfigureMMap(&fd.vfsfd, fd.dentry(), opts) 404 } 405 406 // ensureMappable ensures that fd.dentry().wrappedMappable is not nil. 407 func (fd *regularFileFD) ensureMappable(ctx context.Context, opts *memmap.MMapOpts) error { 408 d := fd.dentry() 409 410 // Fast path if we already have a Mappable for the current top layer. 411 if atomic.LoadUint32(&d.isMappable) != 0 { 412 return nil 413 } 414 415 // Only permit mmap of regular files, since other file types may have 416 // unpredictable behavior when mmapped (e.g. /dev/zero). 417 if atomic.LoadUint32(&d.mode)&linux.S_IFMT != linux.S_IFREG { 418 return linuxerr.ENODEV 419 } 420 421 // Get a Mappable for the current top layer. 422 fd.mu.Lock() 423 defer fd.mu.Unlock() 424 d.copyMu.RLock() 425 defer d.copyMu.RUnlock() 426 if atomic.LoadUint32(&d.isMappable) != 0 { 427 return nil 428 } 429 wrappedFD, err := fd.currentFDLocked(ctx) 430 if err != nil { 431 return err 432 } 433 if err := wrappedFD.ConfigureMMap(ctx, opts); err != nil { 434 return err 435 } 436 if opts.MappingIdentity != nil { 437 opts.MappingIdentity.DecRef(ctx) 438 opts.MappingIdentity = nil 439 } 440 // Use this Mappable for all mappings of this layer (unless we raced with 441 // another call to ensureMappable). 442 d.mapsMu.Lock() 443 defer d.mapsMu.Unlock() 444 d.dataMu.Lock() 445 defer d.dataMu.Unlock() 446 if d.wrappedMappable == nil { 447 d.wrappedMappable = opts.Mappable 448 atomic.StoreUint32(&d.isMappable, 1) 449 } 450 return nil 451 } 452 453 // AddMapping implements memmap.Mappable.AddMapping. 454 func (d *dentry) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error { 455 d.mapsMu.Lock() 456 defer d.mapsMu.Unlock() 457 if err := d.wrappedMappable.AddMapping(ctx, ms, ar, offset, writable); err != nil { 458 return err 459 } 460 if !d.isCopiedUp() { 461 d.lowerMappings.AddMapping(ms, ar, offset, writable) 462 } 463 return nil 464 } 465 466 // RemoveMapping implements memmap.Mappable.RemoveMapping. 467 func (d *dentry) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) { 468 d.mapsMu.Lock() 469 defer d.mapsMu.Unlock() 470 d.wrappedMappable.RemoveMapping(ctx, ms, ar, offset, writable) 471 if !d.isCopiedUp() { 472 d.lowerMappings.RemoveMapping(ms, ar, offset, writable) 473 } 474 } 475 476 // CopyMapping implements memmap.Mappable.CopyMapping. 477 func (d *dentry) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error { 478 d.mapsMu.Lock() 479 defer d.mapsMu.Unlock() 480 if err := d.wrappedMappable.CopyMapping(ctx, ms, srcAR, dstAR, offset, writable); err != nil { 481 return err 482 } 483 if !d.isCopiedUp() { 484 d.lowerMappings.AddMapping(ms, dstAR, offset, writable) 485 } 486 return nil 487 } 488 489 // Translate implements memmap.Mappable.Translate. 490 func (d *dentry) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) { 491 d.dataMu.RLock() 492 defer d.dataMu.RUnlock() 493 return d.wrappedMappable.Translate(ctx, required, optional, at) 494 } 495 496 // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable. 497 func (d *dentry) InvalidateUnsavable(ctx context.Context) error { 498 d.mapsMu.Lock() 499 defer d.mapsMu.Unlock() 500 return d.wrappedMappable.InvalidateUnsavable(ctx) 501 }