github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/host/inode.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package host 16 17 import ( 18 "golang.org/x/sys/unix" 19 "github.com/SagerNet/gvisor/pkg/context" 20 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 21 "github.com/SagerNet/gvisor/pkg/fd" 22 "github.com/SagerNet/gvisor/pkg/safemem" 23 "github.com/SagerNet/gvisor/pkg/secio" 24 "github.com/SagerNet/gvisor/pkg/sentry/fs" 25 "github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil" 26 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 27 "github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport" 28 "github.com/SagerNet/gvisor/pkg/sync" 29 "github.com/SagerNet/gvisor/pkg/syserror" 30 "github.com/SagerNet/gvisor/pkg/waiter" 31 ) 32 33 // inodeOperations implements fs.InodeOperations for an fs.Inodes backed 34 // by a host file descriptor. 35 // 36 // +stateify savable 37 type inodeOperations struct { 38 fsutil.InodeNotVirtual `state:"nosave"` 39 fsutil.InodeNoExtendedAttributes `state:"nosave"` 40 41 // fileState implements fs.CachedFileObject. It exists 42 // to break a circular load dependency between inodeOperations 43 // and cachingInodeOps (below). 44 fileState *inodeFileState `state:"wait"` 45 46 // cachedInodeOps implements memmap.Mappable. 47 cachingInodeOps *fsutil.CachingInodeOperations 48 49 // readdirMu protects the file offset on the host FD. This is needed 50 // for readdir because getdents must use the kernel offset, so 51 // concurrent readdirs must be exclusive. 52 // 53 // All read/write functions pass the offset directly to the kernel and 54 // thus don't need a lock. 55 readdirMu sync.Mutex `state:"nosave"` 56 } 57 58 // inodeFileState implements fs.CachedFileObject and otherwise fully 59 // encapsulates state that needs to be manually loaded on restore for 60 // this file object. 61 // 62 // This unfortunate structure exists because fs.CachingInodeOperations 63 // defines afterLoad and therefore cannot be lazily loaded (to break a 64 // circular load dependency between it and inodeOperations). Even with 65 // lazy loading, this approach defines the dependencies between objects 66 // and the expected load behavior more concretely. 67 // 68 // +stateify savable 69 type inodeFileState struct { 70 // descriptor is the backing host FD. 71 descriptor *descriptor `state:"wait"` 72 73 // Event queue for blocking operations. 74 queue waiter.Queue `state:"zerovalue"` 75 76 // sattr is used to restore the inodeOperations. 77 sattr fs.StableAttr `state:"wait"` 78 79 // savedUAttr is only allocated during S/R. It points to the save-time 80 // unstable attributes and is used to validate restore-time ones. 81 // 82 // Note that these unstable attributes are only used to detect cross-S/R 83 // external file system metadata changes. They may differ from the 84 // cached unstable attributes in cachingInodeOps, as that might differ 85 // from the external file system attributes if there had been WriteOut 86 // failures. S/R is transparent to Sentry and the latter will continue 87 // using its cached values after restore. 88 savedUAttr *fs.UnstableAttr 89 } 90 91 // ReadToBlocksAt implements fsutil.CachedFileObject.ReadToBlocksAt. 92 func (i *inodeFileState) ReadToBlocksAt(ctx context.Context, dsts safemem.BlockSeq, offset uint64) (uint64, error) { 93 // TODO(jamieliu): Using safemem.FromIOReader here is wasteful for two 94 // reasons: 95 // 96 // - Using preadv instead of iterated preads saves on host system calls. 97 // 98 // - Host system calls can handle destination memory that would fault in 99 // gr3 (i.e. they can accept safemem.Blocks with NeedSafecopy() == true), 100 // so the buffering performed by FromIOReader is unnecessary. 101 // 102 // This also applies to the write path below. 103 return safemem.FromIOReader{secio.NewOffsetReader(fd.NewReadWriter(i.FD()), int64(offset))}.ReadToBlocks(dsts) 104 } 105 106 // WriteFromBlocksAt implements fsutil.CachedFileObject.WriteFromBlocksAt. 107 func (i *inodeFileState) WriteFromBlocksAt(ctx context.Context, srcs safemem.BlockSeq, offset uint64) (uint64, error) { 108 return safemem.FromIOWriter{secio.NewOffsetWriter(fd.NewReadWriter(i.FD()), int64(offset))}.WriteFromBlocks(srcs) 109 } 110 111 // SetMaskedAttributes implements fsutil.CachedFileObject.SetMaskedAttributes. 112 func (i *inodeFileState) SetMaskedAttributes(ctx context.Context, mask fs.AttrMask, attr fs.UnstableAttr, _ bool) error { 113 if mask.Empty() { 114 return nil 115 } 116 if mask.UID || mask.GID { 117 return linuxerr.EPERM 118 } 119 if mask.Perms { 120 if err := unix.Fchmod(i.FD(), uint32(attr.Perms.LinuxMode())); err != nil { 121 return err 122 } 123 } 124 if mask.Size { 125 if err := unix.Ftruncate(i.FD(), attr.Size); err != nil { 126 return err 127 } 128 } 129 if mask.AccessTime || mask.ModificationTime { 130 ts := fs.TimeSpec{ 131 ATime: attr.AccessTime, 132 ATimeOmit: !mask.AccessTime, 133 MTime: attr.ModificationTime, 134 MTimeOmit: !mask.ModificationTime, 135 } 136 if err := setTimestamps(i.FD(), ts); err != nil { 137 return err 138 } 139 } 140 return nil 141 } 142 143 // Sync implements fsutil.CachedFileObject.Sync. 144 func (i *inodeFileState) Sync(ctx context.Context) error { 145 return unix.Fsync(i.FD()) 146 } 147 148 // FD implements fsutil.CachedFileObject.FD. 149 func (i *inodeFileState) FD() int { 150 return i.descriptor.value 151 } 152 153 func (i *inodeFileState) unstableAttr(ctx context.Context) (fs.UnstableAttr, error) { 154 var s unix.Stat_t 155 if err := unix.Fstat(i.FD(), &s); err != nil { 156 return fs.UnstableAttr{}, err 157 } 158 return unstableAttr(&s), nil 159 } 160 161 // Allocate implements fsutil.CachedFileObject.Allocate. 162 func (i *inodeFileState) Allocate(_ context.Context, offset, length int64) error { 163 return unix.Fallocate(i.FD(), 0, offset, length) 164 } 165 166 // inodeOperations implements fs.InodeOperations. 167 var _ fs.InodeOperations = (*inodeOperations)(nil) 168 169 // newInode returns a new fs.Inode backed by the host FD. 170 func newInode(ctx context.Context, msrc *fs.MountSource, fd int, saveable bool) (*fs.Inode, error) { 171 // Retrieve metadata. 172 var s unix.Stat_t 173 err := unix.Fstat(fd, &s) 174 if err != nil { 175 return nil, err 176 } 177 178 fileState := &inodeFileState{ 179 sattr: stableAttr(&s), 180 } 181 182 // Initialize the wrapped host file descriptor. 183 fileState.descriptor, err = newDescriptor(fd, saveable, wouldBlock(&s), &fileState.queue) 184 if err != nil { 185 return nil, err 186 } 187 188 // Build the fs.InodeOperations. 189 uattr := unstableAttr(&s) 190 iops := &inodeOperations{ 191 fileState: fileState, 192 cachingInodeOps: fsutil.NewCachingInodeOperations(ctx, fileState, uattr, fsutil.CachingInodeOperationsOptions{ 193 ForcePageCache: msrc.Flags.ForcePageCache, 194 }), 195 } 196 197 // Return the fs.Inode. 198 return fs.NewInode(ctx, iops, msrc, fileState.sattr), nil 199 } 200 201 // Mappable implements fs.InodeOperations.Mappable. 202 func (i *inodeOperations) Mappable(inode *fs.Inode) memmap.Mappable { 203 if !canMap(inode) { 204 return nil 205 } 206 return i.cachingInodeOps 207 } 208 209 // ReturnsWouldBlock returns true if this host FD can return EWOULDBLOCK for 210 // operations that would block. 211 func (i *inodeOperations) ReturnsWouldBlock() bool { 212 return i.fileState.descriptor.wouldBlock 213 } 214 215 // Release implements fs.InodeOperations.Release. 216 func (i *inodeOperations) Release(context.Context) { 217 i.fileState.descriptor.Release() 218 i.cachingInodeOps.Release() 219 } 220 221 // Lookup implements fs.InodeOperations.Lookup. 222 func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) { 223 return nil, syserror.ENOENT 224 } 225 226 // Create implements fs.InodeOperations.Create. 227 func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perm fs.FilePermissions) (*fs.File, error) { 228 return nil, linuxerr.EPERM 229 230 } 231 232 // CreateDirectory implements fs.InodeOperations.CreateDirectory. 233 func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error { 234 return linuxerr.EPERM 235 } 236 237 // CreateLink implements fs.InodeOperations.CreateLink. 238 func (i *inodeOperations) CreateLink(ctx context.Context, dir *fs.Inode, oldname string, newname string) error { 239 return linuxerr.EPERM 240 } 241 242 // CreateHardLink implements fs.InodeOperations.CreateHardLink. 243 func (*inodeOperations) CreateHardLink(context.Context, *fs.Inode, *fs.Inode, string) error { 244 return linuxerr.EPERM 245 } 246 247 // CreateFifo implements fs.InodeOperations.CreateFifo. 248 func (*inodeOperations) CreateFifo(context.Context, *fs.Inode, string, fs.FilePermissions) error { 249 return linuxerr.EPERM 250 } 251 252 // Remove implements fs.InodeOperations.Remove. 253 func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string) error { 254 return linuxerr.EPERM 255 } 256 257 // RemoveDirectory implements fs.InodeOperations.RemoveDirectory. 258 func (i *inodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, name string) error { 259 return linuxerr.EPERM 260 } 261 262 // Rename implements fs.InodeOperations.Rename. 263 func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error { 264 return linuxerr.EPERM 265 } 266 267 // Bind implements fs.InodeOperations.Bind. 268 func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, data transport.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) { 269 return nil, syserror.EOPNOTSUPP 270 } 271 272 // BoundEndpoint implements fs.InodeOperations.BoundEndpoint. 273 func (i *inodeOperations) BoundEndpoint(inode *fs.Inode, path string) transport.BoundEndpoint { 274 return nil 275 } 276 277 // GetFile implements fs.InodeOperations.GetFile. 278 func (i *inodeOperations) GetFile(ctx context.Context, d *fs.Dirent, flags fs.FileFlags) (*fs.File, error) { 279 if fs.IsSocket(d.Inode.StableAttr) { 280 return nil, linuxerr.ENXIO 281 } 282 283 return newFile(ctx, d, flags, i), nil 284 } 285 286 // canMap returns true if this fs.Inode can be memory mapped. 287 func canMap(inode *fs.Inode) bool { 288 // FIXME(b/38213152): Some obscure character devices can be mapped. 289 return fs.IsFile(inode.StableAttr) 290 } 291 292 // UnstableAttr implements fs.InodeOperations.UnstableAttr. 293 func (i *inodeOperations) UnstableAttr(ctx context.Context, inode *fs.Inode) (fs.UnstableAttr, error) { 294 // When the kernel supports mapping host FDs, we do so to take 295 // advantage of the host page cache. We forego updating fs.Inodes 296 // because the host manages consistency of its own inode structures. 297 // 298 // For fs.Inodes that can never be mapped we take advantage of 299 // synchronizing metadata updates through host caches. 300 // 301 // So can we use host kernel metadata caches? 302 if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) { 303 // Then just obtain the attributes. 304 return i.fileState.unstableAttr(ctx) 305 } 306 // No, we're maintaining consistency of metadata ourselves. 307 return i.cachingInodeOps.UnstableAttr(ctx, inode) 308 } 309 310 // Check implements fs.InodeOperations.Check. 311 func (i *inodeOperations) Check(ctx context.Context, inode *fs.Inode, p fs.PermMask) bool { 312 return fs.ContextCanAccessFile(ctx, inode, p) 313 } 314 315 // SetOwner implements fs.InodeOperations.SetOwner. 316 func (i *inodeOperations) SetOwner(context.Context, *fs.Inode, fs.FileOwner) error { 317 return linuxerr.EPERM 318 } 319 320 // SetPermissions implements fs.InodeOperations.SetPermissions. 321 func (i *inodeOperations) SetPermissions(ctx context.Context, inode *fs.Inode, f fs.FilePermissions) bool { 322 // Can we use host kernel metadata caches? 323 if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) { 324 // Then just change the timestamps on the FD, the host 325 // will synchronize the metadata update with any host 326 // inode and page cache. 327 return unix.Fchmod(i.fileState.FD(), uint32(f.LinuxMode())) == nil 328 } 329 // Otherwise update our cached metadata. 330 return i.cachingInodeOps.SetPermissions(ctx, inode, f) 331 } 332 333 // SetTimestamps implements fs.InodeOperations.SetTimestamps. 334 func (i *inodeOperations) SetTimestamps(ctx context.Context, inode *fs.Inode, ts fs.TimeSpec) error { 335 // Can we use host kernel metadata caches? 336 if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) { 337 // Then just change the timestamps on the FD, the host 338 // will synchronize the metadata update with any host 339 // inode and page cache. 340 return setTimestamps(i.fileState.FD(), ts) 341 } 342 // Otherwise update our cached metadata. 343 return i.cachingInodeOps.SetTimestamps(ctx, inode, ts) 344 } 345 346 // Truncate implements fs.InodeOperations.Truncate. 347 func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, size int64) error { 348 // Is the file not memory-mappable? 349 if !canMap(inode) { 350 // Then just change the file size on the FD, the host 351 // will synchronize the metadata update with any host 352 // inode and page cache. 353 return unix.Ftruncate(i.fileState.FD(), size) 354 } 355 // Otherwise we need to go through cachingInodeOps, even if the host page 356 // cache is in use, to invalidate private copies of truncated pages. 357 return i.cachingInodeOps.Truncate(ctx, inode, size) 358 } 359 360 // Allocate implements fs.InodeOperations.Allocate. 361 func (i *inodeOperations) Allocate(ctx context.Context, inode *fs.Inode, offset, length int64) error { 362 // Is the file not memory-mappable? 363 if !canMap(inode) { 364 // Then just send the call to the FD, the host will synchronize the metadata 365 // update with any host inode and page cache. 366 return i.fileState.Allocate(ctx, offset, length) 367 } 368 // Otherwise we need to go through cachingInodeOps, even if the host page 369 // cache is in use, to invalidate private copies of truncated pages. 370 return i.cachingInodeOps.Allocate(ctx, offset, length) 371 } 372 373 // WriteOut implements fs.InodeOperations.WriteOut. 374 func (i *inodeOperations) WriteOut(ctx context.Context, inode *fs.Inode) error { 375 if inode.MountSource.Flags.ReadOnly { 376 return nil 377 } 378 // Have we been using host kernel metadata caches? 379 if !inode.MountSource.Flags.ForcePageCache || !canMap(inode) { 380 // Then the metadata is already up to date on the host. 381 return nil 382 } 383 // Otherwise we need to write out cached pages and attributes 384 // that are dirty. 385 return i.cachingInodeOps.WriteOut(ctx, inode) 386 } 387 388 // Readlink implements fs.InodeOperations.Readlink. 389 func (i *inodeOperations) Readlink(ctx context.Context, inode *fs.Inode) (string, error) { 390 return readLink(i.fileState.FD()) 391 } 392 393 // Getlink implements fs.InodeOperations.Getlink. 394 func (i *inodeOperations) Getlink(context.Context, *fs.Inode) (*fs.Dirent, error) { 395 if !fs.IsSymlink(i.fileState.sattr) { 396 return nil, linuxerr.ENOLINK 397 } 398 return nil, fs.ErrResolveViaReadlink 399 } 400 401 // StatFS implements fs.InodeOperations.StatFS. 402 func (i *inodeOperations) StatFS(context.Context) (fs.Info, error) { 403 return fs.Info{}, syserror.ENOSYS 404 } 405 406 // AddLink implements fs.InodeOperations.AddLink. 407 func (i *inodeOperations) AddLink() {} 408 409 // DropLink implements fs.InodeOperations.DropLink. 410 func (i *inodeOperations) DropLink() {} 411 412 // NotifyStatusChange implements fs.InodeOperations.NotifyStatusChange. 413 func (i *inodeOperations) NotifyStatusChange(ctx context.Context) {}