github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/gofer/file.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gofer 16 17 import ( 18 "fmt" 19 "time" 20 21 "golang.org/x/sys/unix" 22 "github.com/SagerNet/gvisor/pkg/context" 23 "github.com/SagerNet/gvisor/pkg/log" 24 "github.com/SagerNet/gvisor/pkg/metric" 25 "github.com/SagerNet/gvisor/pkg/p9" 26 "github.com/SagerNet/gvisor/pkg/sentry/device" 27 "github.com/SagerNet/gvisor/pkg/sentry/fs" 28 "github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil" 29 "github.com/SagerNet/gvisor/pkg/sentry/fsmetric" 30 "github.com/SagerNet/gvisor/pkg/sentry/memmap" 31 "github.com/SagerNet/gvisor/pkg/syserror" 32 "github.com/SagerNet/gvisor/pkg/usermem" 33 "github.com/SagerNet/gvisor/pkg/waiter" 34 ) 35 36 // fileOperations implements fs.FileOperations for a remote file system. 37 // 38 // +stateify savable 39 type fileOperations struct { 40 fsutil.FileNoIoctl `state:"nosave"` 41 fsutil.FileNoSplice `state:"nosplice"` 42 waiter.AlwaysReady `state:"nosave"` 43 44 // inodeOperations is the inodeOperations backing the file. It is protected 45 // by a reference held by File.Dirent.Inode which is stable until 46 // FileOperations.Release is called. 47 inodeOperations *inodeOperations `state:"wait"` 48 49 // dirCursor is the directory cursor. 50 dirCursor string 51 52 // handles are the opened remote file system handles, which may 53 // be shared with other files. 54 handles *handles `state:"nosave"` 55 56 // flags are the flags used to open handles. 57 flags fs.FileFlags `state:"wait"` 58 } 59 60 // fileOperations implements fs.FileOperations. 61 var _ fs.FileOperations = (*fileOperations)(nil) 62 63 // NewFile returns a file. NewFile is not appropriate with host pipes and sockets. 64 // 65 // The `name` argument is only used to log a warning if we are returning a 66 // writeable+executable file. (A metric counter is incremented in this case as 67 // well.) Note that we cannot call d.BaseName() directly in this function, 68 // because that would lead to a lock order violation, since this is called in 69 // d.Create which holds d.mu, while d.BaseName() takes d.parent.mu, and the two 70 // locks must be taken in the opposite order. 71 func NewFile(ctx context.Context, dirent *fs.Dirent, name string, flags fs.FileFlags, i *inodeOperations, handles *handles) *fs.File { 72 // Remote file systems enforce readability/writability at an offset, 73 // see fs/9p/vfs_inode.c:v9fs_vfs_atomic_open -> fs/open.c:finish_open. 74 flags.Pread = true 75 flags.Pwrite = true 76 77 if fs.IsFile(dirent.Inode.StableAttr) { 78 // If cache policy is "remote revalidating", then we must 79 // ensure that we have a host FD. Otherwise, the 80 // sentry-internal page cache will be used, and we can end up 81 // in an inconsistent state if the remote file changes. 82 cp := dirent.Inode.InodeOperations.(*inodeOperations).session().cachePolicy 83 if cp == cacheRemoteRevalidating && handles.Host == nil { 84 panic(fmt.Sprintf("remote-revalidating cache policy requires gofer to donate host FD, but file %q did not have host FD", name)) 85 } 86 } 87 88 f := &fileOperations{ 89 inodeOperations: i, 90 handles: handles, 91 flags: flags, 92 } 93 if flags.Write { 94 if err := dirent.Inode.CheckPermission(ctx, fs.PermMask{Execute: true}); err == nil { 95 metric.SuspiciousOperationsMetric.Increment("opened_write_execute_file") 96 log.Warningf("Opened a writable executable: %q", name) 97 } 98 } 99 if handles.Host != nil { 100 fsmetric.GoferOpensHost.Increment() 101 } else { 102 fsmetric.GoferOpens9P.Increment() 103 } 104 return fs.NewFile(ctx, dirent, flags, f) 105 } 106 107 // Release implements fs.FileOpeations.Release. 108 func (f *fileOperations) Release(context.Context) { 109 f.handles.DecRef() 110 } 111 112 // Readdir implements fs.FileOperations.Readdir. 113 func (f *fileOperations) Readdir(ctx context.Context, file *fs.File, serializer fs.DentrySerializer) (int64, error) { 114 root := fs.RootFromContext(ctx) 115 if root != nil { 116 defer root.DecRef(ctx) 117 } 118 119 dirCtx := &fs.DirCtx{ 120 Serializer: serializer, 121 DirCursor: &f.dirCursor, 122 } 123 n, err := fs.DirentReaddir(ctx, file.Dirent, f, root, dirCtx, file.Offset()) 124 if f.inodeOperations.session().cachePolicy.cacheUAttrs(file.Dirent.Inode) { 125 f.inodeOperations.cachingInodeOps.TouchAccessTime(ctx, file.Dirent.Inode) 126 } 127 return n, err 128 } 129 130 // IterateDir implements fs.DirIterator.IterateDir. 131 func (f *fileOperations) IterateDir(ctx context.Context, d *fs.Dirent, dirCtx *fs.DirCtx, offset int) (int, error) { 132 f.inodeOperations.readdirMu.Lock() 133 defer f.inodeOperations.readdirMu.Unlock() 134 135 // Fetch directory entries if needed. 136 if !f.inodeOperations.session().cachePolicy.cacheReaddir() || f.inodeOperations.readdirCache == nil { 137 entries, err := f.readdirAll(ctx) 138 if err != nil { 139 return offset, err 140 } 141 142 // Cache the readdir result. 143 f.inodeOperations.readdirCache = fs.NewSortedDentryMap(entries) 144 } 145 146 // Serialize the entries. 147 n, err := fs.GenericReaddir(dirCtx, f.inodeOperations.readdirCache) 148 return offset + n, err 149 } 150 151 // readdirAll fetches fs.DentAttrs for f, using the attributes of g. 152 func (f *fileOperations) readdirAll(ctx context.Context) (map[string]fs.DentAttr, error) { 153 entries := make(map[string]fs.DentAttr) 154 var readOffset uint64 155 for { 156 // We choose some arbitrary high number of directory entries (64k) and call 157 // Readdir until we've exhausted them all. 158 dirents, err := f.handles.File.readdir(ctx, readOffset, 64*1024) 159 if err != nil { 160 return nil, err 161 } 162 if len(dirents) == 0 { 163 // We're done, we reached EOF. 164 break 165 } 166 167 // The last dirent contains the offset into the next set of dirents. The gofer 168 // returns the offset as an index into directories, not as a byte offset, because 169 // converting a byte offset to an index into directories entries is a huge pain. 170 // But everything is fine if we're consistent. 171 readOffset = dirents[len(dirents)-1].Offset 172 173 for _, dirent := range dirents { 174 if dirent.Name == "." || dirent.Name == ".." { 175 // These must not be included in Readdir results. 176 continue 177 } 178 179 // Find a best approximation of the type. 180 var nt fs.InodeType 181 switch dirent.Type { 182 case p9.TypeDir: 183 nt = fs.Directory 184 case p9.TypeSymlink: 185 nt = fs.Symlink 186 default: 187 nt = fs.RegularFile 188 } 189 190 // Install the DentAttr. 191 entries[dirent.Name] = fs.DentAttr{ 192 Type: nt, 193 // Construct the key to find the virtual inode. 194 // Directory entries reside on the same Device 195 // and SecondaryDevice as their parent. 196 InodeID: goferDevice.Map(device.MultiDeviceKey{ 197 Device: f.inodeOperations.fileState.key.Device, 198 SecondaryDevice: f.inodeOperations.fileState.key.SecondaryDevice, 199 Inode: dirent.QID.Path, 200 }), 201 } 202 } 203 } 204 205 return entries, nil 206 } 207 208 // maybeSync will call FSync on the file if the file flags require it. 209 func (f *fileOperations) maybeSync(ctx context.Context, file *fs.File, offset, n int64) error { 210 flags := file.Flags() 211 var syncType fs.SyncType 212 switch { 213 case flags.Direct || flags.Sync: 214 syncType = fs.SyncAll 215 case flags.DSync: 216 syncType = fs.SyncData 217 default: 218 // No need to sync. 219 return nil 220 } 221 222 return f.Fsync(ctx, file, offset, offset+n, syncType) 223 } 224 225 // Write implements fs.FileOperations.Write. 226 func (f *fileOperations) Write(ctx context.Context, file *fs.File, src usermem.IOSequence, offset int64) (int64, error) { 227 if fs.IsDir(file.Dirent.Inode.StableAttr) { 228 // Not all remote file systems enforce this so this client does. 229 return 0, syserror.EISDIR 230 } 231 232 var ( 233 n int64 234 err error 235 ) 236 // The write is handled in different ways depending on the cache policy 237 // and availability of a host-mappable FD. 238 if f.inodeOperations.session().cachePolicy.useCachingInodeOps(file.Dirent.Inode) { 239 n, err = f.inodeOperations.cachingInodeOps.Write(ctx, src, offset) 240 } else { 241 uattr, e := f.UnstableAttr(ctx, file) 242 if e != nil { 243 return 0, e 244 } 245 if f.inodeOperations.fileState.hostMappable != nil { 246 n, err = f.inodeOperations.fileState.hostMappable.Write(ctx, src, offset, uattr) 247 } else { 248 n, err = src.CopyInTo(ctx, f.handles.readWriterAt(ctx, offset)) 249 if n > 0 && uattr.Perms.HasSetUIDOrGID() { 250 uattr.Perms.DropSetUIDAndMaybeGID() 251 f.inodeOperations.SetPermissions(ctx, file.Dirent.Inode, uattr.Perms) 252 } 253 } 254 } 255 256 if n == 0 { 257 // Nothing written. We are done. 258 return 0, err 259 } 260 261 // Write the dirty pages and attributes if cache policy tells us to. 262 if f.inodeOperations.session().cachePolicy.writeThrough(file.Dirent.Inode) { 263 if werr := f.inodeOperations.cachingInodeOps.WriteDirtyPagesAndAttrs(ctx, file.Dirent.Inode); werr != nil { 264 // Report no bytes written since the write faild. 265 return 0, werr 266 } 267 } 268 269 // We may need to sync the written bytes. 270 if syncErr := f.maybeSync(ctx, file, offset, n); syncErr != nil { 271 // Sync failed. Report 0 bytes written, since none of them are 272 // guaranteed to have been synced. 273 return 0, syncErr 274 } 275 276 return n, err 277 } 278 279 // incrementReadCounters increments the read counters for the read starting at the given time. We 280 // use this function rather than using a defer in Read() to avoid the performance hit of defer. 281 func (f *fileOperations) incrementReadCounters(start time.Time) { 282 if f.handles.Host != nil { 283 fsmetric.GoferReadsHost.Increment() 284 fsmetric.FinishReadWait(fsmetric.GoferReadWaitHost, start) 285 } else { 286 fsmetric.GoferReads9P.Increment() 287 fsmetric.FinishReadWait(fsmetric.GoferReadWait9P, start) 288 } 289 } 290 291 // Read implements fs.FileOperations.Read. 292 func (f *fileOperations) Read(ctx context.Context, file *fs.File, dst usermem.IOSequence, offset int64) (int64, error) { 293 start := fsmetric.StartReadWait() 294 if fs.IsDir(file.Dirent.Inode.StableAttr) { 295 // Not all remote file systems enforce this so this client does. 296 f.incrementReadCounters(start) 297 return 0, syserror.EISDIR 298 } 299 300 if f.inodeOperations.session().cachePolicy.useCachingInodeOps(file.Dirent.Inode) { 301 n, err := f.inodeOperations.cachingInodeOps.Read(ctx, file, dst, offset) 302 f.incrementReadCounters(start) 303 return n, err 304 } 305 n, err := dst.CopyOutFrom(ctx, f.handles.readWriterAt(ctx, offset)) 306 f.incrementReadCounters(start) 307 return n, err 308 } 309 310 // Fsync implements fs.FileOperations.Fsync. 311 func (f *fileOperations) Fsync(ctx context.Context, file *fs.File, start, end int64, syncType fs.SyncType) error { 312 switch syncType { 313 case fs.SyncAll, fs.SyncData: 314 if err := file.Dirent.Inode.WriteOut(ctx); err != nil { 315 return err 316 } 317 fallthrough 318 case fs.SyncBackingStorage: 319 // Sync remote caches. 320 if f.handles.Host != nil { 321 // Sync the host fd directly. 322 return unix.Fsync(f.handles.Host.FD()) 323 } 324 // Otherwise sync on the p9.File handle. 325 return f.handles.File.fsync(ctx) 326 } 327 panic("invalid sync type") 328 } 329 330 // Flush implements fs.FileOperations.Flush. 331 func (f *fileOperations) Flush(ctx context.Context, file *fs.File) error { 332 // If this file is not opened writable then there is nothing to flush. 333 // We do this because some p9 server implementations of Flush are 334 // over-zealous. 335 // 336 // FIXME(edahlgren): weaken these implementations and remove this check. 337 if !file.Flags().Write { 338 return nil 339 } 340 // Execute the flush. 341 return f.handles.File.flush(ctx) 342 } 343 344 // ConfigureMMap implements fs.FileOperations.ConfigureMMap. 345 func (f *fileOperations) ConfigureMMap(ctx context.Context, file *fs.File, opts *memmap.MMapOpts) error { 346 return f.inodeOperations.configureMMap(file, opts) 347 } 348 349 // UnstableAttr implements fs.FileOperations.UnstableAttr. 350 func (f *fileOperations) UnstableAttr(ctx context.Context, file *fs.File) (fs.UnstableAttr, error) { 351 s := f.inodeOperations.session() 352 if s.cachePolicy.cacheUAttrs(file.Dirent.Inode) { 353 return f.inodeOperations.cachingInodeOps.UnstableAttr(ctx, file.Dirent.Inode) 354 } 355 // Use f.handles.File, which represents 9P fids that have been opened, 356 // instead of inodeFileState.file, which represents 9P fids that have not. 357 // This may be significantly more efficient in some implementations. 358 _, valid, pattr, err := getattr(ctx, f.handles.File) 359 if err != nil { 360 return fs.UnstableAttr{}, err 361 } 362 return unstable(ctx, valid, pattr, s.mounter, s.client), nil 363 } 364 365 // Seek implements fs.FileOperations.Seek. 366 func (f *fileOperations) Seek(ctx context.Context, file *fs.File, whence fs.SeekWhence, offset int64) (int64, error) { 367 return fsutil.SeekWithDirCursor(ctx, file, whence, offset, &f.dirCursor) 368 }