github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/vfs/vfs.go (about) 1 // Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package vfs 6 7 import ( 8 "io" 9 "os" 10 "path/filepath" 11 "syscall" 12 13 "github.com/cockroachdb/errors" 14 "github.com/cockroachdb/errors/oserror" 15 ) 16 17 // File is a readable, writable sequence of bytes. 18 // 19 // Typically, it will be an *os.File, but test code may choose to substitute 20 // memory-backed implementations. 21 // 22 // Write-oriented operations (Write, Sync) must be called sequentially: At most 23 // 1 call to Write or Sync may be executed at any given time. 24 type File interface { 25 io.Closer 26 io.Reader 27 io.ReaderAt 28 // Unlike the specification for io.Writer.Write(), the vfs.File.Write() 29 // method *is* allowed to modify the slice passed in, whether temporarily 30 // or permanently. Callers of Write() need to take this into account. 31 io.Writer 32 // WriteAt() is only supported for files that were opened with FS.OpenReadWrite. 33 io.WriterAt 34 35 // Preallocate optionally preallocates storage for `length` at `offset` 36 // within the file. Implementations may choose to do nothing. 37 Preallocate(offset, length int64) error 38 Stat() (os.FileInfo, error) 39 Sync() error 40 41 // SyncTo requests that a prefix of the file's data be synced to stable 42 // storage. The caller passes provides a `length`, indicating how many bytes 43 // to sync from the beginning of the file. SyncTo is a no-op for 44 // directories, and therefore always returns false. 45 // 46 // SyncTo returns a fullSync return value, indicating one of two possible 47 // outcomes. 48 // 49 // If fullSync is false, the first `length` bytes of the file was queued to 50 // be synced to stable storage. The syncing of the file prefix may happen 51 // asynchronously. No persistence guarantee is provided. 52 // 53 // If fullSync is true, the entirety of the file's contents were 54 // synchronously synced to stable storage, and a persistence guarantee is 55 // provided. In this outcome, any modified metadata for the file is not 56 // guaranteed to be synced unless that metadata is needed in order to allow 57 // a subsequent data retrieval to be correctly handled. 58 SyncTo(length int64) (fullSync bool, err error) 59 60 // SyncData requires that all written data be persisted. File metadata is 61 // not required to be synced. Unsophisticated implementations may call Sync. 62 SyncData() error 63 64 // Prefetch signals the OS (on supported platforms) to fetch the next length 65 // bytes in file (as returned by os.File.Fd()) after offset into cache. Any 66 // subsequent reads in that range will not issue disk IO. 67 Prefetch(offset int64, length int64) error 68 69 // Fd returns the raw file descriptor when a File is backed by an *os.File. 70 // It can be used for specific functionality like Prefetch. 71 // Returns InvalidFd if not supported. 72 Fd() uintptr 73 } 74 75 // InvalidFd is a special value returned by File.Fd() when the file is not 76 // backed by an OS descriptor. 77 // Note: the special value is consistent with what os.File implementation 78 // returns on a nil receiver. 79 const InvalidFd uintptr = ^(uintptr(0)) 80 81 // OpenOption provide an interface to do work on file handles in the Open() 82 // call. 83 type OpenOption interface { 84 // Apply is called on the file handle after it's opened. 85 Apply(File) 86 } 87 88 // FS is a namespace for files. 89 // 90 // The names are filepath names: they may be / separated or \ separated, 91 // depending on the underlying operating system. 92 type FS interface { 93 // Create creates the named file for reading and writing. If a file 94 // already exists at the provided name, it's removed first ensuring the 95 // resulting file descriptor points to a new inode. 96 Create(name string) (File, error) 97 98 // Link creates newname as a hard link to the oldname file. 99 Link(oldname, newname string) error 100 101 // Open opens the named file for reading. openOptions provides 102 Open(name string, opts ...OpenOption) (File, error) 103 104 // OpenReadWrite opens the named file for reading and writing. If the file 105 // does not exist, it is created. 106 OpenReadWrite(name string, opts ...OpenOption) (File, error) 107 108 // OpenDir opens the named directory for syncing. 109 OpenDir(name string) (File, error) 110 111 // Remove removes the named file or directory. 112 Remove(name string) error 113 114 // Remove removes the named file or directory and any children it 115 // contains. It removes everything it can but returns the first error it 116 // encounters. 117 RemoveAll(name string) error 118 119 // Rename renames a file. It overwrites the file at newname if one exists, 120 // the same as os.Rename. 121 Rename(oldname, newname string) error 122 123 // ReuseForWrite attempts to reuse the file with oldname by renaming it to newname and opening 124 // it for writing without truncation. It is acceptable for the implementation to choose not 125 // to reuse oldname, and simply create the file with newname -- in this case the implementation 126 // should delete oldname. If the caller calls this function with an oldname that does not exist, 127 // the implementation may return an error. 128 ReuseForWrite(oldname, newname string) (File, error) 129 130 // MkdirAll creates a directory and all necessary parents. The permission 131 // bits perm have the same semantics as in os.MkdirAll. If the directory 132 // already exists, MkdirAll does nothing and returns nil. 133 MkdirAll(dir string, perm os.FileMode) error 134 135 // Lock locks the given file, creating the file if necessary, and 136 // truncating the file if it already exists. The lock is an exclusive lock 137 // (a write lock), but locked files should neither be read from nor written 138 // to. Such files should have zero size and only exist to co-ordinate 139 // ownership across processes. 140 // 141 // A nil Closer is returned if an error occurred. Otherwise, close that 142 // Closer to release the lock. 143 // 144 // On Linux and OSX, a lock has the same semantics as fcntl(2)'s advisory 145 // locks. In particular, closing any other file descriptor for the same 146 // file will release the lock prematurely. 147 // 148 // Attempting to lock a file that is already locked by the current process 149 // returns an error and leaves the existing lock untouched. 150 // 151 // Lock is not yet implemented on other operating systems, and calling it 152 // will return an error. 153 Lock(name string) (io.Closer, error) 154 155 // List returns a listing of the given directory. The names returned are 156 // relative to dir. 157 List(dir string) ([]string, error) 158 159 // Stat returns an os.FileInfo describing the named file. 160 Stat(name string) (os.FileInfo, error) 161 162 // PathBase returns the last element of path. Trailing path separators are 163 // removed before extracting the last element. If the path is empty, PathBase 164 // returns ".". If the path consists entirely of separators, PathBase returns a 165 // single separator. 166 PathBase(path string) string 167 168 // PathJoin joins any number of path elements into a single path, adding a 169 // separator if necessary. 170 PathJoin(elem ...string) string 171 172 // PathDir returns all but the last element of path, typically the path's directory. 173 PathDir(path string) string 174 175 // GetDiskUsage returns disk space statistics for the filesystem where 176 // path is any file or directory within that filesystem. 177 GetDiskUsage(path string) (DiskUsage, error) 178 } 179 180 // DiskUsage summarizes disk space usage on a filesystem. 181 type DiskUsage struct { 182 // Total disk space available to the current process in bytes. 183 AvailBytes uint64 184 // Total disk space in bytes. 185 TotalBytes uint64 186 // Used disk space in bytes. 187 UsedBytes uint64 188 } 189 190 // Default is a FS implementation backed by the underlying operating system's 191 // file system. 192 var Default FS = defaultFS{} 193 194 type defaultFS struct{} 195 196 // wrapOSFile takes a standard library OS file and returns a vfs.File. f may be 197 // nil, in which case wrapOSFile must not panic. In such cases, it's okay if the 198 // returned vfs.File may panic if used. 199 func wrapOSFile(f *os.File) File { 200 // See the implementations in default_{linux,unix,windows}.go. 201 return wrapOSFileImpl(f) 202 } 203 204 func (defaultFS) Create(name string) (File, error) { 205 const openFlags = os.O_RDWR | os.O_CREATE | os.O_EXCL | syscall.O_CLOEXEC 206 207 osFile, err := os.OpenFile(name, openFlags, 0666) 208 // If the file already exists, remove it and try again. 209 // 210 // NB: We choose to remove the file instead of truncating it, despite the 211 // fact that we can't do so atomically, because it's more resistant to 212 // misuse when using hard links. 213 214 // We must loop in case another goroutine/thread/process is also 215 // attempting to create the a file at the same path. 216 for oserror.IsExist(err) { 217 if removeErr := os.Remove(name); removeErr != nil && !oserror.IsNotExist(removeErr) { 218 return wrapOSFile(osFile), errors.WithStack(removeErr) 219 } 220 osFile, err = os.OpenFile(name, openFlags, 0666) 221 } 222 return wrapOSFile(osFile), errors.WithStack(err) 223 } 224 225 func (defaultFS) Link(oldname, newname string) error { 226 return errors.WithStack(os.Link(oldname, newname)) 227 } 228 229 func (defaultFS) Open(name string, opts ...OpenOption) (File, error) { 230 osFile, err := os.OpenFile(name, os.O_RDONLY|syscall.O_CLOEXEC, 0) 231 if err != nil { 232 return nil, errors.WithStack(err) 233 } 234 file := wrapOSFile(osFile) 235 for _, opt := range opts { 236 opt.Apply(file) 237 } 238 return file, nil 239 } 240 241 func (defaultFS) OpenReadWrite(name string, opts ...OpenOption) (File, error) { 242 osFile, err := os.OpenFile(name, os.O_RDWR|syscall.O_CLOEXEC|os.O_CREATE, 0666) 243 if err != nil { 244 return nil, errors.WithStack(err) 245 } 246 file := wrapOSFile(osFile) 247 for _, opt := range opts { 248 opt.Apply(file) 249 } 250 return file, nil 251 } 252 253 func (defaultFS) Remove(name string) error { 254 return errors.WithStack(os.Remove(name)) 255 } 256 257 func (defaultFS) RemoveAll(name string) error { 258 return errors.WithStack(os.RemoveAll(name)) 259 } 260 261 func (defaultFS) Rename(oldname, newname string) error { 262 return errors.WithStack(os.Rename(oldname, newname)) 263 } 264 265 func (fs defaultFS) ReuseForWrite(oldname, newname string) (File, error) { 266 if err := fs.Rename(oldname, newname); err != nil { 267 return nil, errors.WithStack(err) 268 } 269 f, err := os.OpenFile(newname, os.O_RDWR|os.O_CREATE|syscall.O_CLOEXEC, 0666) 270 return wrapOSFile(f), errors.WithStack(err) 271 } 272 273 func (defaultFS) MkdirAll(dir string, perm os.FileMode) error { 274 return errors.WithStack(os.MkdirAll(dir, perm)) 275 } 276 277 func (defaultFS) List(dir string) ([]string, error) { 278 f, err := os.Open(dir) 279 if err != nil { 280 return nil, err 281 } 282 defer f.Close() 283 dirnames, err := f.Readdirnames(-1) 284 return dirnames, errors.WithStack(err) 285 } 286 287 func (defaultFS) Stat(name string) (os.FileInfo, error) { 288 finfo, err := os.Stat(name) 289 return finfo, errors.WithStack(err) 290 } 291 292 func (defaultFS) PathBase(path string) string { 293 return filepath.Base(path) 294 } 295 296 func (defaultFS) PathJoin(elem ...string) string { 297 return filepath.Join(elem...) 298 } 299 300 func (defaultFS) PathDir(path string) string { 301 return filepath.Dir(path) 302 } 303 304 type randomReadsOption struct{} 305 306 // RandomReadsOption is an OpenOption that optimizes opened file handle for 307 // random reads, by calling fadvise() with POSIX_FADV_RANDOM on Linux systems 308 // to disable readahead. 309 var RandomReadsOption OpenOption = &randomReadsOption{} 310 311 // Apply implements the OpenOption interface. 312 func (randomReadsOption) Apply(f File) { 313 if fd := f.Fd(); fd != InvalidFd { 314 _ = fadviseRandom(fd) 315 } 316 } 317 318 type sequentialReadsOption struct{} 319 320 // SequentialReadsOption is an OpenOption that optimizes opened file handle for 321 // sequential reads, by calling fadvise() with POSIX_FADV_SEQUENTIAL on Linux 322 // systems to enable readahead. 323 var SequentialReadsOption OpenOption = &sequentialReadsOption{} 324 325 // Apply implements the OpenOption interface. 326 func (sequentialReadsOption) Apply(f File) { 327 if fd := f.Fd(); fd != InvalidFd { 328 _ = fadviseSequential(fd) 329 } 330 } 331 332 // Copy copies the contents of oldname to newname. If newname exists, it will 333 // be overwritten. 334 func Copy(fs FS, oldname, newname string) error { 335 return CopyAcrossFS(fs, oldname, fs, newname) 336 } 337 338 // CopyAcrossFS copies the contents of oldname on srcFS to newname dstFS. If 339 // newname exists, it will be overwritten. 340 func CopyAcrossFS(srcFS FS, oldname string, dstFS FS, newname string) error { 341 src, err := srcFS.Open(oldname, SequentialReadsOption) 342 if err != nil { 343 return err 344 } 345 defer src.Close() 346 347 dst, err := dstFS.Create(newname) 348 if err != nil { 349 return err 350 } 351 defer dst.Close() 352 353 if _, err := io.Copy(dst, src); err != nil { 354 return err 355 } 356 return dst.Sync() 357 } 358 359 // LimitedCopy copies up to maxBytes from oldname to newname. If newname 360 // exists, it will be overwritten. 361 func LimitedCopy(fs FS, oldname, newname string, maxBytes int64) error { 362 src, err := fs.Open(oldname, SequentialReadsOption) 363 if err != nil { 364 return err 365 } 366 defer src.Close() 367 368 dst, err := fs.Create(newname) 369 if err != nil { 370 return err 371 } 372 defer dst.Close() 373 374 if _, err := io.Copy(dst, &io.LimitedReader{R: src, N: maxBytes}); err != nil { 375 return err 376 } 377 return dst.Sync() 378 } 379 380 // LinkOrCopy creates newname as a hard link to the oldname file. If creating 381 // the hard link fails, LinkOrCopy falls back to copying the file (which may 382 // also fail if oldname doesn't exist or newname already exists). 383 func LinkOrCopy(fs FS, oldname, newname string) error { 384 err := fs.Link(oldname, newname) 385 if err == nil { 386 return nil 387 } 388 // Permit a handful of errors which we know won't be fixed by copying the 389 // file. Note that we don't check for the specifics of the error code as it 390 // isn't easy to do so in a portable manner. On Unix we'd have to check for 391 // LinkError.Err == syscall.EXDEV. On Windows we'd have to check for 392 // ERROR_NOT_SAME_DEVICE, ERROR_INVALID_FUNCTION, and 393 // ERROR_INVALID_PARAMETER. Rather that such OS specific checks, we fall back 394 // to always trying to copy if hard-linking failed. 395 if oserror.IsExist(err) || oserror.IsNotExist(err) || oserror.IsPermission(err) { 396 return err 397 } 398 return Copy(fs, oldname, newname) 399 } 400 401 // Root returns the base FS implementation, unwrapping all nested FSs that 402 // expose an Unwrap method. 403 func Root(fs FS) FS { 404 type unwrapper interface { 405 Unwrap() FS 406 } 407 408 for { 409 u, ok := fs.(unwrapper) 410 if !ok { 411 break 412 } 413 fs = u.Unwrap() 414 } 415 return fs 416 } 417 418 // ErrUnsupported may be returned a FS when it does not support an operation. 419 var ErrUnsupported = errors.New("pebble: not supported")