github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/vfs/read.go (about) 1 package vfs 2 3 import ( 4 "context" 5 "io" 6 "os" 7 "sync" 8 "time" 9 10 "github.com/pkg/errors" 11 "github.com/rclone/rclone/fs" 12 "github.com/rclone/rclone/fs/accounting" 13 "github.com/rclone/rclone/fs/chunkedreader" 14 "github.com/rclone/rclone/fs/hash" 15 ) 16 17 // ReadFileHandle is an open for read file handle on a File 18 type ReadFileHandle struct { 19 baseHandle 20 done func(err error) 21 mu sync.Mutex 22 cond *sync.Cond // cond lock for out of sequence reads 23 closed bool // set if handle has been closed 24 r *accounting.Account 25 readCalled bool // set if read has been called 26 size int64 // size of the object (0 for unknown length) 27 offset int64 // offset of read of o 28 roffset int64 // offset of Read() calls 29 noSeek bool 30 sizeUnknown bool // set if size of source is not known 31 file *File 32 hash *hash.MultiHasher 33 opened bool 34 remote string 35 } 36 37 // Check interfaces 38 var ( 39 _ io.Reader = (*ReadFileHandle)(nil) 40 _ io.ReaderAt = (*ReadFileHandle)(nil) 41 _ io.Seeker = (*ReadFileHandle)(nil) 42 _ io.Closer = (*ReadFileHandle)(nil) 43 ) 44 45 func newReadFileHandle(f *File) (*ReadFileHandle, error) { 46 var mhash *hash.MultiHasher 47 var err error 48 o := f.getObject() 49 if !f.VFS().Opt.NoChecksum { 50 hashes := hash.NewHashSet(o.Fs().Hashes().GetOne()) // just pick one hash 51 mhash, err = hash.NewMultiHasherTypes(hashes) 52 if err != nil { 53 fs.Errorf(o.Fs(), "newReadFileHandle hash error: %v", err) 54 } 55 } 56 57 fh := &ReadFileHandle{ 58 remote: o.Remote(), 59 noSeek: f.VFS().Opt.NoSeek, 60 file: f, 61 hash: mhash, 62 size: nonNegative(o.Size()), 63 sizeUnknown: o.Size() < 0, 64 } 65 fh.cond = sync.NewCond(&fh.mu) 66 return fh, nil 67 } 68 69 // openPending opens the file if there is a pending open 70 // call with the lock held 71 func (fh *ReadFileHandle) openPending() (err error) { 72 if fh.opened { 73 return nil 74 } 75 o := fh.file.getObject() 76 r, err := chunkedreader.New(context.TODO(), o, int64(fh.file.VFS().Opt.ChunkSize), int64(fh.file.VFS().Opt.ChunkSizeLimit)).Open() 77 if err != nil { 78 return err 79 } 80 tr := accounting.GlobalStats().NewTransfer(o) 81 fh.done = tr.Done 82 fh.r = tr.Account(r).WithBuffer() // account the transfer 83 fh.opened = true 84 85 return nil 86 } 87 88 // String converts it to printable 89 func (fh *ReadFileHandle) String() string { 90 if fh == nil { 91 return "<nil *ReadFileHandle>" 92 } 93 fh.mu.Lock() 94 defer fh.mu.Unlock() 95 if fh.file == nil { 96 return "<nil *ReadFileHandle.file>" 97 } 98 return fh.file.String() + " (r)" 99 } 100 101 // Node returns the Node assocuated with this - satisfies Noder interface 102 func (fh *ReadFileHandle) Node() Node { 103 fh.mu.Lock() 104 defer fh.mu.Unlock() 105 return fh.file 106 } 107 108 // seek to a new offset 109 // 110 // if reopen is true, then we won't attempt to use an io.Seeker interface 111 // 112 // Must be called with fh.mu held 113 func (fh *ReadFileHandle) seek(offset int64, reopen bool) (err error) { 114 if fh.noSeek { 115 return ESPIPE 116 } 117 fh.hash = nil 118 if !reopen { 119 ar := fh.r.GetAsyncReader() 120 // try to fulfill the seek with buffer discard 121 if ar != nil && ar.SkipBytes(int(offset-fh.offset)) { 122 fh.offset = offset 123 return nil 124 } 125 } 126 fh.r.StopBuffering() // stop the background reading first 127 oldReader := fh.r.GetReader() 128 r, ok := oldReader.(*chunkedreader.ChunkedReader) 129 if !ok { 130 fs.Logf(fh.remote, "ReadFileHandle.Read expected reader to be a ChunkedReader, got %T", oldReader) 131 reopen = true 132 } 133 if !reopen { 134 fs.Debugf(fh.remote, "ReadFileHandle.seek from %d to %d (fs.RangeSeeker)", fh.offset, offset) 135 _, err = r.RangeSeek(context.TODO(), offset, io.SeekStart, -1) 136 if err != nil { 137 fs.Debugf(fh.remote, "ReadFileHandle.Read fs.RangeSeeker failed: %v", err) 138 return err 139 } 140 } else { 141 fs.Debugf(fh.remote, "ReadFileHandle.seek from %d to %d", fh.offset, offset) 142 // close old one 143 err = oldReader.Close() 144 if err != nil { 145 fs.Debugf(fh.remote, "ReadFileHandle.Read seek close old failed: %v", err) 146 } 147 // re-open with a seek 148 o := fh.file.getObject() 149 r = chunkedreader.New(context.TODO(), o, int64(fh.file.VFS().Opt.ChunkSize), int64(fh.file.VFS().Opt.ChunkSizeLimit)) 150 _, err := r.Seek(offset, 0) 151 if err != nil { 152 fs.Debugf(fh.remote, "ReadFileHandle.Read seek failed: %v", err) 153 return err 154 } 155 r, err = r.Open() 156 if err != nil { 157 fs.Debugf(fh.remote, "ReadFileHandle.Read seek failed: %v", err) 158 return err 159 } 160 } 161 fh.r.UpdateReader(r) 162 fh.offset = offset 163 return nil 164 } 165 166 // Seek the file - returns ESPIPE if seeking isn't possible 167 func (fh *ReadFileHandle) Seek(offset int64, whence int) (n int64, err error) { 168 fh.mu.Lock() 169 defer fh.mu.Unlock() 170 if fh.noSeek { 171 return 0, ESPIPE 172 } 173 size := fh.size 174 switch whence { 175 case io.SeekStart: 176 fh.roffset = 0 177 case io.SeekEnd: 178 fh.roffset = size 179 } 180 fh.roffset += offset 181 // we don't check the offset - the next Read will 182 return fh.roffset, nil 183 } 184 185 // ReadAt reads len(p) bytes into p starting at offset off in the 186 // underlying input source. It returns the number of bytes read (0 <= 187 // n <= len(p)) and any error encountered. 188 // 189 // When ReadAt returns n < len(p), it returns a non-nil error 190 // explaining why more bytes were not returned. In this respect, 191 // ReadAt is stricter than Read. 192 // 193 // Even if ReadAt returns n < len(p), it may use all of p as scratch 194 // space during the call. If some data is available but not len(p) 195 // bytes, ReadAt blocks until either all the data is available or an 196 // error occurs. In this respect ReadAt is different from Read. 197 // 198 // If the n = len(p) bytes returned by ReadAt are at the end of the 199 // input source, ReadAt may return either err == EOF or err == nil. 200 // 201 // If ReadAt is reading from an input source with a seek offset, 202 // ReadAt should not affect nor be affected by the underlying seek 203 // offset. 204 // 205 // Clients of ReadAt can execute parallel ReadAt calls on the same 206 // input source. 207 // 208 // Implementations must not retain p. 209 func (fh *ReadFileHandle) ReadAt(p []byte, off int64) (n int, err error) { 210 fh.mu.Lock() 211 defer fh.mu.Unlock() 212 return fh.readAt(p, off) 213 } 214 215 // This waits for *poff to equal off or aborts after the timeout. 216 // 217 // Waits here potentially affect all seeks so need to keep them short 218 // 219 // Call with fh.mu Locked 220 func waitSequential(what string, remote string, cond *sync.Cond, maxWait time.Duration, poff *int64, off int64) { 221 var ( 222 timeout = time.NewTimer(maxWait) 223 done = make(chan struct{}) 224 abort = false 225 ) 226 go func() { 227 select { 228 case <-timeout.C: 229 // take the lock to make sure that cond.Wait() is called before 230 // cond.Broadcast. NB cond.L == mu 231 cond.L.Lock() 232 // set abort flag and give all the waiting goroutines a kick on timeout 233 abort = true 234 fs.Debugf(remote, "aborting in-sequence %s wait, off=%d", what, off) 235 cond.Broadcast() 236 cond.L.Unlock() 237 case <-done: 238 } 239 }() 240 for *poff != off && !abort { 241 fs.Debugf(remote, "waiting for in-sequence %s to %d for %v", what, off, maxWait) 242 cond.Wait() 243 } 244 // tidy up end timer 245 close(done) 246 timeout.Stop() 247 if *poff != off { 248 fs.Debugf(remote, "failed to wait for in-sequence %s to %d", what, off) 249 } 250 } 251 252 // Implementation of ReadAt - call with lock held 253 func (fh *ReadFileHandle) readAt(p []byte, off int64) (n int, err error) { 254 // defer log.Trace(fh.remote, "p[%d], off=%d", len(p), off)("n=%d, err=%v", &n, &err) 255 err = fh.openPending() // FIXME pending open could be more efficient in the presence of seek (and retries) 256 if err != nil { 257 return 0, err 258 } 259 // fs.Debugf(fh.remote, "ReadFileHandle.Read size %d offset %d", reqSize, off) 260 if fh.closed { 261 fs.Errorf(fh.remote, "ReadFileHandle.Read error: %v", EBADF) 262 return 0, ECLOSED 263 } 264 maxBuf := 1024 * 1024 265 if len(p) < maxBuf { 266 maxBuf = len(p) 267 } 268 if gap := off - fh.offset; gap > 0 && gap < int64(8*maxBuf) { 269 waitSequential("read", fh.remote, fh.cond, fh.file.VFS().Opt.ReadWait, &fh.offset, off) 270 } 271 doSeek := off != fh.offset 272 if doSeek && fh.noSeek { 273 return 0, ESPIPE 274 } 275 var newOffset int64 276 retries := 0 277 reqSize := len(p) 278 doReopen := false 279 for { 280 if doSeek { 281 // Are we attempting to seek beyond the end of the 282 // file - if so just return EOF leaving the underlying 283 // file in an unchanged state. 284 if off >= fh.size { 285 fs.Debugf(fh.remote, "ReadFileHandle.Read attempt to read beyond end of file: %d > %d", off, fh.size) 286 return 0, io.EOF 287 } 288 // Otherwise do the seek 289 err = fh.seek(off, doReopen) 290 } else { 291 err = nil 292 } 293 if err == nil { 294 if reqSize > 0 { 295 fh.readCalled = true 296 } 297 n, err = io.ReadFull(fh.r, p) 298 newOffset = fh.offset + int64(n) 299 // if err == nil && rand.Intn(10) == 0 { 300 // err = errors.New("random error") 301 // } 302 if err == nil { 303 break 304 } else if (err == io.ErrUnexpectedEOF || err == io.EOF) && (newOffset == fh.size || fh.sizeUnknown) { 305 if fh.sizeUnknown { 306 // size is now known since we have read to the end 307 fh.sizeUnknown = false 308 fh.size = newOffset 309 } 310 // Have read to end of file - reset error 311 err = nil 312 break 313 } 314 } 315 if retries >= fs.Config.LowLevelRetries { 316 break 317 } 318 retries++ 319 fs.Errorf(fh.remote, "ReadFileHandle.Read error: low level retry %d/%d: %v", retries, fs.Config.LowLevelRetries, err) 320 doSeek = true 321 doReopen = true 322 } 323 if err != nil { 324 fs.Errorf(fh.remote, "ReadFileHandle.Read error: %v", err) 325 } else { 326 fh.offset = newOffset 327 // fs.Debugf(fh.remote, "ReadFileHandle.Read OK") 328 329 if fh.hash != nil { 330 _, err = fh.hash.Write(p[:n]) 331 if err != nil { 332 fs.Errorf(fh.remote, "ReadFileHandle.Read HashError: %v", err) 333 return 0, err 334 } 335 } 336 337 // If we have no error and we didn't fill the buffer, must be EOF 338 if n != len(p) { 339 err = io.EOF 340 } 341 } 342 fh.cond.Broadcast() // wake everyone up waiting for an in-sequence read 343 return n, err 344 } 345 346 func (fh *ReadFileHandle) checkHash() error { 347 if fh.hash == nil || !fh.readCalled || fh.offset < fh.size { 348 return nil 349 } 350 351 o := fh.file.getObject() 352 for hashType, dstSum := range fh.hash.Sums() { 353 srcSum, err := o.Hash(context.TODO(), hashType) 354 if err != nil { 355 if os.IsNotExist(errors.Cause(err)) { 356 // if it was file not found then at 357 // this point we don't care any more 358 continue 359 } 360 return err 361 } 362 if !hash.Equals(dstSum, srcSum) { 363 return errors.Errorf("corrupted on transfer: %v hash differ %q vs %q", hashType, dstSum, srcSum) 364 } 365 } 366 367 return nil 368 } 369 370 // Read reads up to len(p) bytes into p. It returns the number of bytes read (0 371 // <= n <= len(p)) and any error encountered. Even if Read returns n < len(p), 372 // it may use all of p as scratch space during the call. If some data is 373 // available but not len(p) bytes, Read conventionally returns what is 374 // available instead of waiting for more. 375 // 376 // When Read encounters an error or end-of-file condition after successfully 377 // reading n > 0 bytes, it returns the number of bytes read. It may return the 378 // (non-nil) error from the same call or return the error (and n == 0) from a 379 // subsequent call. An instance of this general case is that a Reader returning 380 // a non-zero number of bytes at the end of the input stream may return either 381 // err == EOF or err == nil. The next Read should return 0, EOF. 382 // 383 // Callers should always process the n > 0 bytes returned before considering 384 // the error err. Doing so correctly handles I/O errors that happen after 385 // reading some bytes and also both of the allowed EOF behaviors. 386 // 387 // Implementations of Read are discouraged from returning a zero byte count 388 // with a nil error, except when len(p) == 0. Callers should treat a return of 389 // 0 and nil as indicating that nothing happened; in particular it does not 390 // indicate EOF. 391 // 392 // Implementations must not retain p. 393 func (fh *ReadFileHandle) Read(p []byte) (n int, err error) { 394 fh.mu.Lock() 395 defer fh.mu.Unlock() 396 if fh.roffset >= fh.size && !fh.sizeUnknown { 397 return 0, io.EOF 398 } 399 n, err = fh.readAt(p, fh.roffset) 400 fh.roffset += int64(n) 401 return n, err 402 } 403 404 // close the file handle returning EBADF if it has been 405 // closed already. 406 // 407 // Must be called with fh.mu held 408 func (fh *ReadFileHandle) close() error { 409 if fh.closed { 410 return ECLOSED 411 } 412 fh.closed = true 413 414 if fh.opened { 415 var err error 416 defer func() { 417 fh.done(err) 418 }() 419 // Close first so that we have hashes 420 err = fh.r.Close() 421 if err != nil { 422 return err 423 } 424 // Now check the hash 425 err = fh.checkHash() 426 if err != nil { 427 return err 428 } 429 } 430 return nil 431 } 432 433 // Close closes the file 434 func (fh *ReadFileHandle) Close() error { 435 fh.mu.Lock() 436 defer fh.mu.Unlock() 437 return fh.close() 438 } 439 440 // Flush is called each time the file or directory is closed. 441 // Because there can be multiple file descriptors referring to a 442 // single opened file, Flush can be called multiple times. 443 func (fh *ReadFileHandle) Flush() error { 444 fh.mu.Lock() 445 defer fh.mu.Unlock() 446 if !fh.opened { 447 return nil 448 } 449 // fs.Debugf(fh.remote, "ReadFileHandle.Flush") 450 451 if err := fh.checkHash(); err != nil { 452 fs.Errorf(fh.remote, "ReadFileHandle.Flush error: %v", err) 453 return err 454 } 455 456 // fs.Debugf(fh.remote, "ReadFileHandle.Flush OK") 457 return nil 458 } 459 460 // Release is called when we are finished with the file handle 461 // 462 // It isn't called directly from userspace so the error is ignored by 463 // the kernel 464 func (fh *ReadFileHandle) Release() error { 465 fh.mu.Lock() 466 defer fh.mu.Unlock() 467 if !fh.opened { 468 return nil 469 } 470 if fh.closed { 471 fs.Debugf(fh.remote, "ReadFileHandle.Release nothing to do") 472 return nil 473 } 474 fs.Debugf(fh.remote, "ReadFileHandle.Release closing") 475 err := fh.close() 476 if err != nil { 477 fs.Errorf(fh.remote, "ReadFileHandle.Release error: %v", err) 478 } else { 479 // fs.Debugf(fh.remote, "ReadFileHandle.Release OK") 480 } 481 return err 482 } 483 484 // Size returns the size of the underlying file 485 func (fh *ReadFileHandle) Size() int64 { 486 fh.mu.Lock() 487 defer fh.mu.Unlock() 488 return fh.size 489 } 490 491 // Stat returns info about the file 492 func (fh *ReadFileHandle) Stat() (os.FileInfo, error) { 493 fh.mu.Lock() 494 defer fh.mu.Unlock() 495 return fh.file, nil 496 }