github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/vfs/read.go (about) 1 package vfs 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "sync" 10 "sync/atomic" 11 "time" 12 13 "github.com/rclone/rclone/fs" 14 "github.com/rclone/rclone/fs/accounting" 15 "github.com/rclone/rclone/fs/chunkedreader" 16 "github.com/rclone/rclone/fs/hash" 17 ) 18 19 // ReadFileHandle is an open for read file handle on a File 20 type ReadFileHandle struct { 21 baseHandle 22 done func(ctx context.Context, err error) 23 mu sync.Mutex 24 cond sync.Cond // cond lock for out of sequence reads 25 r *accounting.Account 26 size int64 // size of the object (0 for unknown length) 27 offset int64 // offset of read of o 28 roffset int64 // offset of Read() calls 29 file *File 30 hash *hash.MultiHasher 31 remote string 32 closed bool // set if handle has been closed 33 readCalled bool // set if read has been called 34 noSeek bool 35 sizeUnknown bool // set if size of source is not known 36 opened bool 37 } 38 39 // Check interfaces 40 var ( 41 _ io.Reader = (*ReadFileHandle)(nil) 42 _ io.ReaderAt = (*ReadFileHandle)(nil) 43 _ io.Seeker = (*ReadFileHandle)(nil) 44 _ io.Closer = (*ReadFileHandle)(nil) 45 ) 46 47 func newReadFileHandle(f *File) (*ReadFileHandle, error) { 48 var mhash *hash.MultiHasher 49 var err error 50 o := f.getObject() 51 if !f.VFS().Opt.NoChecksum { 52 hashes := hash.NewHashSet(o.Fs().Hashes().GetOne()) // just pick one hash 53 mhash, err = hash.NewMultiHasherTypes(hashes) 54 if err != nil { 55 fs.Errorf(o.Fs(), "newReadFileHandle hash error: %v", err) 56 } 57 } 58 59 fh := &ReadFileHandle{ 60 remote: o.Remote(), 61 noSeek: f.VFS().Opt.NoSeek, 62 file: f, 63 hash: mhash, 64 size: nonNegative(o.Size()), 65 sizeUnknown: o.Size() < 0, 66 } 67 fh.cond = sync.Cond{L: &fh.mu} 68 return fh, nil 69 } 70 71 // openPending opens the file if there is a pending open 72 // call with the lock held 73 func (fh *ReadFileHandle) openPending() (err error) { 74 if fh.opened { 75 return nil 76 } 77 o := fh.file.getObject() 78 r, err := chunkedreader.New(context.TODO(), o, int64(fh.file.VFS().Opt.ChunkSize), int64(fh.file.VFS().Opt.ChunkSizeLimit)).Open() 79 if err != nil { 80 return err 81 } 82 tr := accounting.GlobalStats().NewTransfer(o, nil) 83 fh.done = tr.Done 84 fh.r = tr.Account(context.TODO(), r).WithBuffer() // account the transfer 85 fh.opened = true 86 87 return nil 88 } 89 90 // String converts it to printable 91 func (fh *ReadFileHandle) String() string { 92 if fh == nil { 93 return "<nil *ReadFileHandle>" 94 } 95 fh.mu.Lock() 96 defer fh.mu.Unlock() 97 if fh.file == nil { 98 return "<nil *ReadFileHandle.file>" 99 } 100 return fh.file.String() + " (r)" 101 } 102 103 // Node returns the Node associated with this - satisfies Noder interface 104 func (fh *ReadFileHandle) Node() Node { 105 fh.mu.Lock() 106 defer fh.mu.Unlock() 107 return fh.file 108 } 109 110 // seek to a new offset 111 // 112 // if reopen is true, then we won't attempt to use an io.Seeker interface 113 // 114 // Must be called with fh.mu held 115 func (fh *ReadFileHandle) seek(offset int64, reopen bool) (err error) { 116 if fh.noSeek { 117 return ESPIPE 118 } 119 fh.hash = nil 120 if !reopen { 121 ar := fh.r.GetAsyncReader() 122 // try to fulfill the seek with buffer discard 123 if ar != nil && ar.SkipBytes(int(offset-fh.offset)) { 124 fh.offset = offset 125 return nil 126 } 127 } 128 fh.r.StopBuffering() // stop the background reading first 129 oldReader := fh.r.GetReader() 130 r, ok := oldReader.(*chunkedreader.ChunkedReader) 131 if !ok { 132 fs.Logf(fh.remote, "ReadFileHandle.Read expected reader to be a ChunkedReader, got %T", oldReader) 133 reopen = true 134 } 135 if !reopen { 136 fs.Debugf(fh.remote, "ReadFileHandle.seek from %d to %d (fs.RangeSeeker)", fh.offset, offset) 137 _, err = r.RangeSeek(context.TODO(), offset, io.SeekStart, -1) 138 if err != nil { 139 fs.Debugf(fh.remote, "ReadFileHandle.Read fs.RangeSeeker failed: %v", err) 140 return err 141 } 142 } else { 143 fs.Debugf(fh.remote, "ReadFileHandle.seek from %d to %d", fh.offset, offset) 144 // close old one 145 err = oldReader.Close() 146 if err != nil { 147 fs.Debugf(fh.remote, "ReadFileHandle.Read seek close old failed: %v", err) 148 } 149 // re-open with a seek 150 o := fh.file.getObject() 151 r = chunkedreader.New(context.TODO(), o, int64(fh.file.VFS().Opt.ChunkSize), int64(fh.file.VFS().Opt.ChunkSizeLimit)) 152 _, err := r.Seek(offset, 0) 153 if err != nil { 154 fs.Debugf(fh.remote, "ReadFileHandle.Read seek failed: %v", err) 155 return err 156 } 157 r, err = r.Open() 158 if err != nil { 159 fs.Debugf(fh.remote, "ReadFileHandle.Read seek failed: %v", err) 160 return err 161 } 162 } 163 fh.r.UpdateReader(context.TODO(), r) 164 fh.offset = offset 165 return nil 166 } 167 168 // Seek the file - returns ESPIPE if seeking isn't possible 169 func (fh *ReadFileHandle) Seek(offset int64, whence int) (n int64, err error) { 170 fh.mu.Lock() 171 defer fh.mu.Unlock() 172 if fh.noSeek { 173 return 0, ESPIPE 174 } 175 size := fh.size 176 switch whence { 177 case io.SeekStart: 178 fh.roffset = 0 179 case io.SeekEnd: 180 fh.roffset = size 181 } 182 fh.roffset += offset 183 // we don't check the offset - the next Read will 184 return fh.roffset, nil 185 } 186 187 // ReadAt reads len(p) bytes into p starting at offset off in the 188 // underlying input source. It returns the number of bytes read (0 <= 189 // n <= len(p)) and any error encountered. 190 // 191 // When ReadAt returns n < len(p), it returns a non-nil error 192 // explaining why more bytes were not returned. In this respect, 193 // ReadAt is stricter than Read. 194 // 195 // Even if ReadAt returns n < len(p), it may use all of p as scratch 196 // space during the call. If some data is available but not len(p) 197 // bytes, ReadAt blocks until either all the data is available or an 198 // error occurs. In this respect ReadAt is different from Read. 199 // 200 // If the n = len(p) bytes returned by ReadAt are at the end of the 201 // input source, ReadAt may return either err == EOF or err == nil. 202 // 203 // If ReadAt is reading from an input source with a seek offset, 204 // ReadAt should not affect nor be affected by the underlying seek 205 // offset. 206 // 207 // Clients of ReadAt can execute parallel ReadAt calls on the same 208 // input source. 209 // 210 // Implementations must not retain p. 211 func (fh *ReadFileHandle) ReadAt(p []byte, off int64) (n int, err error) { 212 fh.mu.Lock() 213 defer fh.mu.Unlock() 214 return fh.readAt(p, off) 215 } 216 217 // This waits for *poff to equal off or aborts after the timeout. 218 // 219 // Waits here potentially affect all seeks so need to keep them short. 220 // 221 // Call with fh.mu Locked 222 func waitSequential(what string, remote string, cond *sync.Cond, maxWait time.Duration, poff *int64, off int64) { 223 var ( 224 timeout = time.NewTimer(maxWait) 225 done = make(chan struct{}) 226 abort atomic.Int32 227 ) 228 go func() { 229 select { 230 case <-timeout.C: 231 // take the lock to make sure that cond.Wait() is called before 232 // cond.Broadcast. NB cond.L == mu 233 cond.L.Lock() 234 // set abort flag and give all the waiting goroutines a kick on timeout 235 abort.Store(1) 236 fs.Debugf(remote, "aborting in-sequence %s wait, off=%d", what, off) 237 cond.Broadcast() 238 cond.L.Unlock() 239 case <-done: 240 } 241 }() 242 for *poff != off && abort.Load() == 0 { 243 fs.Debugf(remote, "waiting for in-sequence %s to %d for %v", what, off, maxWait) 244 cond.Wait() 245 } 246 // tidy up end timer 247 close(done) 248 timeout.Stop() 249 if *poff != off { 250 fs.Debugf(remote, "failed to wait for in-sequence %s to %d", what, off) 251 } 252 } 253 254 // Implementation of ReadAt - call with lock held 255 func (fh *ReadFileHandle) readAt(p []byte, off int64) (n int, err error) { 256 // defer log.Trace(fh.remote, "p[%d], off=%d", len(p), off)("n=%d, err=%v", &n, &err) 257 err = fh.openPending() // FIXME pending open could be more efficient in the presence of seek (and retries) 258 if err != nil { 259 return 0, err 260 } 261 // fs.Debugf(fh.remote, "ReadFileHandle.Read size %d offset %d", reqSize, off) 262 if fh.closed { 263 fs.Errorf(fh.remote, "ReadFileHandle.Read error: %v", EBADF) 264 return 0, ECLOSED 265 } 266 maxBuf := 1024 * 1024 267 if len(p) < maxBuf { 268 maxBuf = len(p) 269 } 270 if gap := off - fh.offset; gap > 0 && gap < int64(8*maxBuf) { 271 waitSequential("read", fh.remote, &fh.cond, fh.file.VFS().Opt.ReadWait, &fh.offset, off) 272 } 273 doSeek := off != fh.offset 274 if doSeek && fh.noSeek { 275 return 0, ESPIPE 276 } 277 var newOffset int64 278 retries := 0 279 reqSize := len(p) 280 doReopen := false 281 lowLevelRetries := fs.GetConfig(context.TODO()).LowLevelRetries 282 for { 283 if doSeek { 284 // Are we attempting to seek beyond the end of the 285 // file - if so just return EOF leaving the underlying 286 // file in an unchanged state. 287 if off >= fh.size { 288 fs.Debugf(fh.remote, "ReadFileHandle.Read attempt to read beyond end of file: %d > %d", off, fh.size) 289 return 0, io.EOF 290 } 291 // Otherwise do the seek 292 err = fh.seek(off, doReopen) 293 } else { 294 err = nil 295 } 296 if err == nil { 297 if reqSize > 0 { 298 fh.readCalled = true 299 } 300 n, err = io.ReadFull(fh.r, p) 301 newOffset = fh.offset + int64(n) 302 // if err == nil && rand.Intn(10) == 0 { 303 // err = errors.New("random error") 304 // } 305 if err == nil { 306 break 307 } else if (err == io.ErrUnexpectedEOF || err == io.EOF) && (newOffset == fh.size || fh.sizeUnknown) { 308 if fh.sizeUnknown { 309 // size is now known since we have read to the end 310 fh.sizeUnknown = false 311 fh.size = newOffset 312 } 313 // Have read to end of file - reset error 314 err = nil 315 break 316 } 317 } 318 if retries >= lowLevelRetries { 319 break 320 } 321 retries++ 322 fs.Errorf(fh.remote, "ReadFileHandle.Read error: low level retry %d/%d: %v", retries, lowLevelRetries, err) 323 doSeek = true 324 doReopen = true 325 } 326 if err != nil { 327 fs.Errorf(fh.remote, "ReadFileHandle.Read error: %v", err) 328 } else { 329 fh.offset = newOffset 330 // fs.Debugf(fh.remote, "ReadFileHandle.Read OK") 331 332 if fh.hash != nil { 333 _, err = fh.hash.Write(p[:n]) 334 if err != nil { 335 fs.Errorf(fh.remote, "ReadFileHandle.Read HashError: %v", err) 336 return 0, err 337 } 338 } 339 340 // If we have no error and we didn't fill the buffer, must be EOF 341 if n != len(p) { 342 err = io.EOF 343 } 344 } 345 fh.cond.Broadcast() // wake everyone up waiting for an in-sequence read 346 return n, err 347 } 348 349 func (fh *ReadFileHandle) checkHash() error { 350 if fh.hash == nil || !fh.readCalled || fh.offset < fh.size { 351 return nil 352 } 353 354 o := fh.file.getObject() 355 for hashType, dstSum := range fh.hash.Sums() { 356 srcSum, err := o.Hash(context.TODO(), hashType) 357 if err != nil { 358 if errors.Is(err, os.ErrNotExist) { 359 // if it was file not found then at 360 // this point we don't care any more 361 continue 362 } 363 return err 364 } 365 if !hash.Equals(dstSum, srcSum) { 366 return fmt.Errorf("corrupted on transfer: %v hashes differ src %q vs dst %q", hashType, srcSum, dstSum) 367 } 368 } 369 370 return nil 371 } 372 373 // Read reads up to len(p) bytes into p. It returns the number of bytes read (0 374 // <= n <= len(p)) and any error encountered. Even if Read returns n < len(p), 375 // it may use all of p as scratch space during the call. If some data is 376 // available but not len(p) bytes, Read conventionally returns what is 377 // available instead of waiting for more. 378 // 379 // When Read encounters an error or end-of-file condition after successfully 380 // reading n > 0 bytes, it returns the number of bytes read. It may return the 381 // (non-nil) error from the same call or return the error (and n == 0) from a 382 // subsequent call. An instance of this general case is that a Reader returning 383 // a non-zero number of bytes at the end of the input stream may return either 384 // err == EOF or err == nil. The next Read should return 0, EOF. 385 // 386 // Callers should always process the n > 0 bytes returned before considering 387 // the error err. Doing so correctly handles I/O errors that happen after 388 // reading some bytes and also both of the allowed EOF behaviors. 389 // 390 // Implementations of Read are discouraged from returning a zero byte count 391 // with a nil error, except when len(p) == 0. Callers should treat a return of 392 // 0 and nil as indicating that nothing happened; in particular it does not 393 // indicate EOF. 394 // 395 // Implementations must not retain p. 396 func (fh *ReadFileHandle) Read(p []byte) (n int, err error) { 397 fh.mu.Lock() 398 defer fh.mu.Unlock() 399 if fh.roffset >= fh.size && !fh.sizeUnknown { 400 return 0, io.EOF 401 } 402 n, err = fh.readAt(p, fh.roffset) 403 fh.roffset += int64(n) 404 return n, err 405 } 406 407 // close the file handle returning EBADF if it has been 408 // closed already. 409 // 410 // Must be called with fh.mu held 411 func (fh *ReadFileHandle) close() error { 412 if fh.closed { 413 return ECLOSED 414 } 415 fh.closed = true 416 417 if fh.opened { 418 var err error 419 defer func() { 420 fh.done(context.TODO(), err) 421 }() 422 // Close first so that we have hashes 423 err = fh.r.Close() 424 if err != nil { 425 return err 426 } 427 // Now check the hash 428 err = fh.checkHash() 429 if err != nil { 430 return err 431 } 432 } 433 return nil 434 } 435 436 // Close closes the file 437 func (fh *ReadFileHandle) Close() error { 438 fh.mu.Lock() 439 defer fh.mu.Unlock() 440 return fh.close() 441 } 442 443 // Flush is called each time the file or directory is closed. 444 // Because there can be multiple file descriptors referring to a 445 // single opened file, Flush can be called multiple times. 446 func (fh *ReadFileHandle) Flush() error { 447 fh.mu.Lock() 448 defer fh.mu.Unlock() 449 if !fh.opened { 450 return nil 451 } 452 // fs.Debugf(fh.remote, "ReadFileHandle.Flush") 453 454 if err := fh.checkHash(); err != nil { 455 fs.Errorf(fh.remote, "ReadFileHandle.Flush error: %v", err) 456 return err 457 } 458 459 // fs.Debugf(fh.remote, "ReadFileHandle.Flush OK") 460 return nil 461 } 462 463 // Release is called when we are finished with the file handle 464 // 465 // It isn't called directly from userspace so the error is ignored by 466 // the kernel 467 func (fh *ReadFileHandle) Release() error { 468 fh.mu.Lock() 469 defer fh.mu.Unlock() 470 if !fh.opened { 471 return nil 472 } 473 if fh.closed { 474 fs.Debugf(fh.remote, "ReadFileHandle.Release nothing to do") 475 return nil 476 } 477 fs.Debugf(fh.remote, "ReadFileHandle.Release closing") 478 err := fh.close() 479 if err != nil { 480 fs.Errorf(fh.remote, "ReadFileHandle.Release error: %v", err) 481 //} else { 482 // fs.Debugf(fh.remote, "ReadFileHandle.Release OK") 483 } 484 return err 485 } 486 487 // Name returns the name of the file from the underlying Object. 488 func (fh *ReadFileHandle) Name() string { 489 return fh.file.String() 490 } 491 492 // Size returns the size of the underlying file 493 func (fh *ReadFileHandle) Size() int64 { 494 fh.mu.Lock() 495 defer fh.mu.Unlock() 496 return fh.size 497 } 498 499 // Stat returns info about the file 500 func (fh *ReadFileHandle) Stat() (os.FileInfo, error) { 501 fh.mu.Lock() 502 defer fh.mu.Unlock() 503 return fh.file, nil 504 }