github.com/grailbio/base@v0.0.11/file/gfilefs/gfile.go (about) 1 // Copyright 2022 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache-2.0 3 // license that can be found in the LICENSE file. 4 5 package gfilefs 6 7 import ( 8 "bytes" 9 "context" 10 "fmt" 11 "io" 12 "io/ioutil" 13 "os" 14 "time" 15 16 "github.com/grailbio/base/errors" 17 "github.com/grailbio/base/file" 18 "github.com/grailbio/base/file/fsnodefuse" 19 "github.com/grailbio/base/file/internal/readmatcher" 20 "github.com/grailbio/base/ioctx" 21 "github.com/grailbio/base/ioctx/fsctx" 22 "github.com/grailbio/base/sync/ctxsync" 23 "github.com/hanwen/go-fuse/v2/fuse" 24 ) 25 26 // gfile implements fsctx.File and fsnodefuse.Writable to represent open 27 // gfilefs files. 28 type gfile struct { 29 // n is the node for which this instance is an open file. 30 n *fileNode 31 // flag holds the flag bits specified when this file was opened. 32 flag int 33 34 // readerAt is an optional ReaderAt implementation. It may only be set upon 35 // construction, and must not be modified later. Thus, it can be read by 36 // multiple goroutines without holding the lock, without a data race. 37 // When non-nil, it serves ReadAt requests concurrently, without ops. 38 // Otherwise, gfile.ReadAt uses ops.ReadAt. 39 readerAt ioctx.ReaderAt 40 41 // mu provides mutually exclusive access to the fields below. 42 mu ctxsync.Mutex 43 // requestedSize is the size requested by Truncate. Note that we only 44 // really support truncation to 0 well, as it is mainly used by go-fuse for 45 // truncation when handling O_TRUNC. 46 requestedSize int64 47 // flushed is true if there are no writes that need to be flushed. If 48 // flushed == true, Flush is a no-op. 49 flushed bool 50 // anyWritten tracks whether we have written any bytes to this file. We 51 // use this to decide whether we can use direct writing. 52 anyWritten bool 53 // ops handles underlying I/O operations. See ioOps. ops may be lazily 54 // populated, and it may be reassigned over the lifetime of the file, e.g. 55 // after truncation, we may switch to an ops that no longer uses a 56 // temporary file. 57 ops ioOps 58 } 59 60 var ( 61 _ fsctx.File = (*gfile)(nil) 62 _ ioctx.ReaderAt = (*gfile)(nil) 63 _ fsnodefuse.Writable = (*gfile)(nil) 64 ) 65 66 // OpenFile opens the file at n and returns a *gfile representing it for file 67 // operations. 68 func OpenFile(ctx context.Context, n *fileNode, flag int) (*gfile, error) { 69 gf := &gfile{ 70 n: n, 71 flag: flag, 72 requestedSize: -1, 73 // Creation and truncation require flushing. 74 flushed: (flag&os.O_CREATE) == 0 && (flag&os.O_TRUNC) == 0, 75 } 76 if (flag & int(fuse.O_ANYWRITE)) == 0 { 77 // Read-only files are initialized eagerly, as it is cheap, and we can 78 // immediately return any errors. Writable files are initialized 79 // lazily; see lockedInitOps. 80 f, err := file.Open(ctx, n.path) 81 if err != nil { 82 return nil, err 83 } 84 dr := directRead{ 85 f: f, 86 matcher: readmatcher.New(f.OffsetReader), 87 r: f.Reader(context.Background()), // TODO: Tie to gf lifetime? 88 } 89 gf.ops = &dr 90 gf.readerAt = dr.matcher 91 return gf, nil 92 } 93 return gf, nil 94 } 95 96 // Stat implements fsctx.File. 97 func (gf *gfile) Stat(ctx context.Context) (os.FileInfo, error) { 98 if err := gf.mu.Lock(ctx); err != nil { 99 return nil, err 100 } 101 defer gf.mu.Unlock() 102 if err := gf.lockedInitOps(ctx); err != nil { 103 return nil, err 104 } 105 info, err := gf.ops.Stat(ctx) 106 if err != nil { 107 if errors.Recover(err).Kind == errors.NotSupported { 108 return gf.n.Info(), nil 109 } 110 return nil, errors.E(err, "getting stat info from underlying I/O") 111 } 112 newInfo := gf.n.fsnodeInfo(). 113 WithModTime(info.ModTime()). 114 WithSize(info.Size()) 115 gf.n.setFsnodeInfo(newInfo) 116 return newInfo, nil 117 } 118 119 // Read implements fsctx.File. 120 func (gf *gfile) Read(ctx context.Context, p []byte) (int, error) { 121 if err := gf.mu.Lock(ctx); err != nil { 122 return 0, err 123 } 124 defer gf.mu.Unlock() 125 if err := gf.lockedInitOps(ctx); err != nil { 126 return 0, err 127 } 128 return gf.ops.Read(ctx, p) 129 } 130 131 // ReadAt implements ioctx.ReaderAt. 132 func (gf *gfile) ReadAt(ctx context.Context, p []byte, off int64) (int, error) { 133 if gf.readerAt != nil { 134 return gf.readerAt.ReadAt(ctx, p, off) 135 } 136 if err := gf.mu.Lock(ctx); err != nil { 137 return 0, err 138 } 139 defer gf.mu.Unlock() 140 if err := gf.lockedInitOps(ctx); err != nil { 141 return 0, err 142 } 143 return gf.ops.ReadAt(ctx, p, off) 144 } 145 146 // WriteAt implements fsnodefuse.Writable. 147 func (gf *gfile) WriteAt(ctx context.Context, p []byte, off int64) (int, error) { 148 if err := gf.mu.Lock(ctx); err != nil { 149 return 0, err 150 } 151 defer gf.mu.Unlock() 152 if err := gf.lockedInitOps(ctx); err != nil { 153 return 0, err 154 } 155 n, err := gf.ops.WriteAt(ctx, p, off) 156 if err != nil { 157 return n, err 158 } 159 gf.anyWritten = true 160 gf.flushed = false 161 return n, err 162 } 163 164 // Truncate implements fsnodefuse.Writable. 165 func (gf *gfile) Truncate(ctx context.Context, size int64) error { 166 if err := gf.mu.Lock(ctx); err != nil { 167 return err 168 } 169 defer gf.mu.Unlock() 170 gf.flushed = false 171 if gf.ops == nil { 172 gf.requestedSize = 0 173 return nil 174 } 175 return gf.ops.Truncate(ctx, size) 176 } 177 178 // Flush implements fsnodefuse.Writable. 179 func (gf *gfile) Flush(ctx context.Context) error { 180 if err := gf.mu.Lock(ctx); err != nil { 181 return err 182 } 183 defer gf.mu.Unlock() 184 return gf.lockedFlush() 185 } 186 187 // Fsync implements fsnodefuse.Writable. 188 func (gf *gfile) Fsync(ctx context.Context) error { 189 // We treat Fsync as Flush, mostly because leaving it unimplemented 190 // (ENOSYS) breaks too many applications. 191 return gf.Flush(ctx) 192 } 193 194 // Close implements fsctx.File. 195 func (gf *gfile) Close(ctx context.Context) error { 196 if err := gf.mu.Lock(ctx); err != nil { 197 return err 198 } 199 defer gf.mu.Unlock() 200 if gf.ops == nil { 201 return nil 202 } 203 return gf.ops.Close(ctx) 204 } 205 206 // lockedInitOps initializes the ops that handle the underlying I/O operations 207 // of gf. This is done lazily in some cases, as it may be expensive, e.g. 208 // downloading a remotely stored file locally. Initialization may also depend 209 // on other operations, e.g. if the first manipulation is truncation, then we 210 // won't download existing data. gf.ops is non-nil iff lockedInitOps returns a 211 // nil error. The caller must have gf.mu locked. 212 func (gf *gfile) lockedInitOps(ctx context.Context) (err error) { 213 if gf.ops != nil { 214 return nil 215 } 216 // base/file does not expose an API to open a file for writing without 217 // creating it, so writing implies creation. 218 const tmpPattern = "gfilefs-" 219 var ( 220 rdwr = (gf.flag & os.O_RDWR) == os.O_RDWR 221 // Treat O_EXCL as O_TRUNC, as the file package does not support 222 // O_EXCL. 223 trunc = !gf.anyWritten && 224 (gf.requestedSize == 0 || 225 (gf.flag&os.O_TRUNC) == os.O_TRUNC || 226 (gf.flag&os.O_EXCL) == os.O_EXCL) 227 ) 228 switch { 229 case trunc && rdwr: 230 tmp, err := ioutil.TempFile("", tmpPattern) 231 if err != nil { 232 return errors.E(err, "making temp file") 233 } 234 gf.ops = &tmpIO{n: gf.n, f: tmp} 235 return nil 236 case trunc: 237 f, err := file.Create(ctx, gf.n.path) 238 if err != nil { 239 return errors.E(err, fmt.Sprintf("creating file at %q", gf.n.path)) 240 } 241 // This is a workaround for the fact that directWrite ops do not 242 // support Stat (as write-only s3files do not support Stat). Callers, 243 // e.g. fsnodefuse, may fall back to use the node's information, so we 244 // zero that to keep a sensible view. 245 gf.n.setFsnodeInfo(gf.n.fsnodeInfo().WithSize(0)) 246 gf.ops = &directWrite{ 247 n: gf.n, 248 f: f, 249 w: f.Writer(context.Background()), // TODO: Tie to gf lifetime? 250 off: 0, 251 } 252 return nil 253 default: 254 // existing reads out existing file contents. Contents may be empty if 255 // no file exists yet. 256 var existing io.Reader 257 f, err := file.Open(ctx, gf.n.path) 258 if err == nil { 259 existing = f.Reader(ctx) 260 } else { 261 if errors.Is(errors.NotExist, err) { 262 if !rdwr { 263 // Write-only and no existing file, so we can use direct 264 // I/O. 265 f, err = file.Create(ctx, gf.n.path) 266 if err != nil { 267 return errors.E(err, fmt.Sprintf("creating file at %q", gf.n.path)) 268 } 269 gf.ops = &directWrite{ 270 n: gf.n, 271 f: f, 272 w: f.Writer(context.Background()), // TODO: Tie to gf lifetime? 273 off: 0, 274 } 275 return nil 276 } 277 // No existing file, so there are no existing contents. 278 err = nil 279 existing = &bytes.Buffer{} 280 } else { 281 return errors.E(err, fmt.Sprintf("opening file for %q", gf.n.path)) 282 } 283 } 284 tmp, err := ioutil.TempFile("", tmpPattern) 285 if err != nil { 286 // fp was opened for reading, so don't worry about the error on 287 // Close. 288 _ = f.Close(ctx) 289 return errors.E(err, "making temp file") 290 } 291 _, err = io.Copy(tmp, existing) 292 if err != nil { 293 // We're going to report the copy error, so we treat closing as 294 // best-effort. 295 _ = f.Close(ctx) 296 _ = tmp.Close() 297 return errors.E(err, fmt.Sprintf("copying current contents to temp file %q", tmp.Name())) 298 } 299 gf.ops = &tmpIO{n: gf.n, f: tmp} 300 return nil 301 } 302 } 303 304 // lockedFlush flushes writes to the backing write I/O state. The caller must 305 // have gf.mu locked. 306 func (gf *gfile) lockedFlush() (err error) { 307 // We use a background context when flushing as a workaround for handling 308 // interrupted operations, particularly from Go clients. As of Go 1.14, 309 // slow system calls may see more EINTR errors[1]. While most file 310 // operations are automatically retried[2], closing (which results in 311 // flushing) is not[3]. Ultimately, clients may see spurious, confusing 312 // failures calling (*os.File).Close. Given that it is extremely uncommon 313 // for callers to retry, we ignore interrupts to avoid the confusion. The 314 // significant downside is that intentional interruption, e.g. CTRL-C on a 315 // program that is taking too long, is also ignored, so processes can 316 // appear hung. 317 // 318 // TODO: Consider a better way of handling this problem. 319 // 320 // [1] https://go.dev/doc/go1.14#runtime 321 // [2] https://github.com/golang/go/commit/6b420169d798c7ebe733487b56ea5c3fa4aab5ce 322 // [3] https://github.com/golang/go/blob/go1.17.8/src/internal/poll/fd_unix.go#L79-L83 323 ctx := context.Background() 324 if gf.flushed { 325 return nil 326 } 327 defer func() { 328 if err == nil { 329 gf.flushed = true 330 } 331 }() 332 if (gf.flag & int(fuse.O_ANYWRITE)) != 0 { 333 if err = gf.lockedInitOps(ctx); err != nil { 334 return err 335 } 336 } 337 reuseOps, err := gf.ops.Flush(ctx) 338 if err != nil { 339 return err 340 } 341 if !reuseOps { 342 gf.ops = nil 343 } 344 return nil 345 } 346 347 // ioOps handles the underlying I/O operations for a *gfile. Implementations 348 // may directly call base/file or use a temporary file on local disk until 349 // flush. 350 type ioOps interface { 351 Stat(ctx context.Context) (file.Info, error) 352 Read(ctx context.Context, p []byte) (int, error) 353 ReadAt(ctx context.Context, p []byte, off int64) (int, error) 354 WriteAt(ctx context.Context, p []byte, off int64) (int, error) 355 Truncate(ctx context.Context, size int64) error 356 Flush(ctx context.Context) (reuseOps bool, _ error) 357 Close(ctx context.Context) error 358 } 359 360 // directRead implements ioOps. It reads directly using base/file and does not 361 // support writes, e.g. to handle O_RDONLY. 362 type directRead struct { 363 f file.File 364 matcher interface { 365 ioctx.ReaderAt 366 ioctx.Closer 367 } 368 r io.ReadSeeker 369 } 370 371 var _ ioOps = (*directRead)(nil) 372 373 func (ops *directRead) Stat(ctx context.Context) (file.Info, error) { 374 return ops.f.Stat(ctx) 375 } 376 377 func (ops *directRead) Read(ctx context.Context, p []byte) (int, error) { 378 return ops.r.Read(p) 379 } 380 381 func (ops *directRead) ReadAt(ctx context.Context, p []byte, off int64) (_ int, err error) { 382 return ops.matcher.ReadAt(ctx, p, off) 383 } 384 385 func (*directRead) WriteAt(ctx context.Context, p []byte, off int64) (int, error) { 386 return 0, errors.E(errors.Invalid, "writing read-only file") 387 } 388 389 func (*directRead) Truncate(ctx context.Context, size int64) error { 390 return errors.E(errors.Invalid, "cannot truncate read-only file") 391 } 392 393 func (*directRead) Flush(ctx context.Context) (reuseOps bool, _ error) { 394 return true, nil 395 } 396 397 func (ops *directRead) Close(ctx context.Context) error { 398 err := ops.matcher.Close(ctx) 399 errors.CleanUpCtx(ctx, ops.f.Close, &err) 400 return err 401 } 402 403 // directWrite implements ioOps. It writes directly using base/file and does 404 // not support reads, e.g. to handle O_WRONLY|O_TRUNC. 405 type directWrite struct { 406 n *fileNode 407 f file.File 408 w io.Writer 409 off int64 410 } 411 412 var _ ioOps = (*directWrite)(nil) 413 414 func (ops directWrite) Stat(ctx context.Context) (file.Info, error) { 415 return ops.f.Stat(ctx) 416 } 417 418 func (directWrite) Read(ctx context.Context, p []byte) (int, error) { 419 return 0, errors.E(errors.Invalid, "reading write-only file") 420 } 421 422 func (directWrite) ReadAt(ctx context.Context, p []byte, off int64) (int, error) { 423 return 0, errors.E(errors.Invalid, "reading write-only file") 424 } 425 426 func (ops *directWrite) WriteAt(ctx context.Context, p []byte, off int64) (int, error) { 427 if off != ops.off { 428 return 0, errors.E(errors.NotSupported, "non-contiguous write") 429 } 430 n, err := ops.w.Write(p) 431 ops.off += int64(n) 432 return n, err 433 } 434 435 func (ops directWrite) Truncate(ctx context.Context, size int64) error { 436 if ops.off != size { 437 return errors.E(errors.NotSupported, "truncating to %d not supported by direct I/O") 438 } 439 return nil 440 } 441 442 func (ops *directWrite) Flush(ctx context.Context) (reuseOps bool, _ error) { 443 err := ops.f.Close(ctx) 444 ops.n.setFsnodeInfo( 445 ops.n.fsnodeInfo(). 446 WithModTime(time.Now()). 447 WithSize(ops.off), 448 ) 449 // Clear to catch accidental reuse. 450 *ops = directWrite{} 451 return false, err 452 } 453 454 func (ops directWrite) Close(ctx context.Context) error { 455 return ops.f.Close(ctx) 456 } 457 458 // tmpIO implements ioOps. It is backed by a temporary local file, e.g. to 459 // handle O_RDWR. 460 type tmpIO struct { 461 n *fileNode 462 f *os.File // refers to a file in -tmp-dir. 463 } 464 465 var _ ioOps = (*tmpIO)(nil) 466 467 func (ops tmpIO) Stat(_ context.Context) (file.Info, error) { 468 return ops.f.Stat() 469 } 470 471 func (ops tmpIO) Read(_ context.Context, p []byte) (int, error) { 472 return ops.f.Read(p) 473 } 474 475 func (ops tmpIO) ReadAt(_ context.Context, p []byte, off int64) (int, error) { 476 return ops.f.ReadAt(p, off) 477 } 478 479 func (ops tmpIO) WriteAt(_ context.Context, p []byte, off int64) (int, error) { 480 return ops.f.WriteAt(p, off) 481 } 482 483 func (ops tmpIO) Truncate(_ context.Context, size int64) error { 484 return ops.f.Truncate(size) 485 } 486 487 func (ops *tmpIO) Flush(ctx context.Context) (reuseOps bool, err error) { 488 dst, err := file.Create(ctx, ops.n.path) 489 if err != nil { 490 return false, errors.E(err, fmt.Sprintf("creating file %q", ops.n.path)) 491 } 492 defer file.CloseAndReport(ctx, dst, &err) 493 n, err := io.Copy(dst.Writer(ctx), &readerAdapter{r: ops.f}) 494 if err != nil { 495 return false, errors.E( 496 err, 497 fmt.Sprintf("copying from %q to %q", ops.f.Name(), ops.n.path), 498 ) 499 } 500 ops.n.setFsnodeInfo( 501 ops.n.fsnodeInfo(). 502 WithModTime(time.Now()). 503 WithSize(n), 504 ) 505 return true, nil 506 } 507 508 // readerAdapter adapts an io.ReaderAt to be an io.Reader, calling ReadAt and 509 // maintaining the offset for the next Read. 510 type readerAdapter struct { 511 r io.ReaderAt 512 off int64 513 } 514 515 func (a *readerAdapter) Read(p []byte) (int, error) { 516 n, err := a.r.ReadAt(p, a.off) 517 a.off += int64(n) 518 return n, err 519 } 520 521 func (ops *tmpIO) Close(_ context.Context) error { 522 err := ops.f.Close() 523 if errRemove := os.Remove(ops.f.Name()); errRemove != nil && err == nil { 524 err = errors.E(errRemove, "removing tmpIO file") 525 } 526 return err 527 }