github.com/xhghs/rclone@v1.51.1-0.20200430155106-e186a28cced8/vfs/read.go (about) 1 package vfs 2 3 import ( 4 "context" 5 "io" 6 "os" 7 "sync" 8 "sync/atomic" 9 "time" 10 11 "github.com/pkg/errors" 12 "github.com/rclone/rclone/fs" 13 "github.com/rclone/rclone/fs/accounting" 14 "github.com/rclone/rclone/fs/chunkedreader" 15 "github.com/rclone/rclone/fs/hash" 16 ) 17 18 // ReadFileHandle is an open for read file handle on a File 19 type ReadFileHandle struct { 20 baseHandle 21 done func(err error) 22 mu sync.Mutex 23 cond *sync.Cond // cond lock for out of sequence reads 24 closed bool // set if handle has been closed 25 r *accounting.Account 26 readCalled bool // set if read has been called 27 size int64 // size of the object (0 for unknown length) 28 offset int64 // offset of read of o 29 roffset int64 // offset of Read() calls 30 noSeek bool 31 sizeUnknown bool // set if size of source is not known 32 file *File 33 hash *hash.MultiHasher 34 opened bool 35 remote string 36 } 37 38 // Check interfaces 39 var ( 40 _ io.Reader = (*ReadFileHandle)(nil) 41 _ io.ReaderAt = (*ReadFileHandle)(nil) 42 _ io.Seeker = (*ReadFileHandle)(nil) 43 _ io.Closer = (*ReadFileHandle)(nil) 44 ) 45 46 func newReadFileHandle(f *File) (*ReadFileHandle, error) { 47 var mhash *hash.MultiHasher 48 var err error 49 o := f.getObject() 50 if !f.d.vfs.Opt.NoChecksum { 51 hashes := hash.NewHashSet(o.Fs().Hashes().GetOne()) // just pick one hash 52 mhash, err = hash.NewMultiHasherTypes(hashes) 53 if err != nil { 54 fs.Errorf(o.Fs(), "newReadFileHandle hash error: %v", err) 55 } 56 } 57 58 fh := &ReadFileHandle{ 59 remote: o.Remote(), 60 noSeek: f.d.vfs.Opt.NoSeek, 61 file: f, 62 hash: mhash, 63 size: nonNegative(o.Size()), 64 sizeUnknown: o.Size() < 0, 65 } 66 fh.cond = sync.NewCond(&fh.mu) 67 return fh, nil 68 } 69 70 // openPending opens the file if there is a pending open 71 // call with the lock held 72 func (fh *ReadFileHandle) openPending() (err error) { 73 if fh.opened { 74 return nil 75 } 76 o := fh.file.getObject() 77 r, err := chunkedreader.New(context.TODO(), o, int64(fh.file.d.vfs.Opt.ChunkSize), int64(fh.file.d.vfs.Opt.ChunkSizeLimit)).Open() 78 if err != nil { 79 return err 80 } 81 tr := accounting.GlobalStats().NewTransfer(o) 82 fh.done = tr.Done 83 fh.r = tr.Account(r).WithBuffer() // account the transfer 84 fh.opened = true 85 86 return nil 87 } 88 89 // String converts it to printable 90 func (fh *ReadFileHandle) String() string { 91 if fh == nil { 92 return "<nil *ReadFileHandle>" 93 } 94 fh.mu.Lock() 95 defer fh.mu.Unlock() 96 if fh.file == nil { 97 return "<nil *ReadFileHandle.file>" 98 } 99 return fh.file.String() + " (r)" 100 } 101 102 // Node returns the Node assocuated with this - satisfies Noder interface 103 func (fh *ReadFileHandle) Node() Node { 104 fh.mu.Lock() 105 defer fh.mu.Unlock() 106 return fh.file 107 } 108 109 // seek to a new offset 110 // 111 // if reopen is true, then we won't attempt to use an io.Seeker interface 112 // 113 // Must be called with fh.mu held 114 func (fh *ReadFileHandle) seek(offset int64, reopen bool) (err error) { 115 if fh.noSeek { 116 return ESPIPE 117 } 118 fh.hash = nil 119 if !reopen { 120 ar := fh.r.GetAsyncReader() 121 // try to fullfill the seek with buffer discard 122 if ar != nil && ar.SkipBytes(int(offset-fh.offset)) { 123 fh.offset = offset 124 return nil 125 } 126 } 127 fh.r.StopBuffering() // stop the background reading first 128 oldReader := fh.r.GetReader() 129 r, ok := oldReader.(*chunkedreader.ChunkedReader) 130 if !ok { 131 fs.Logf(fh.remote, "ReadFileHandle.Read expected reader to be a ChunkedReader, got %T", oldReader) 132 reopen = true 133 } 134 if !reopen { 135 fs.Debugf(fh.remote, "ReadFileHandle.seek from %d to %d (fs.RangeSeeker)", fh.offset, offset) 136 _, err = r.RangeSeek(context.TODO(), offset, io.SeekStart, -1) 137 if err != nil { 138 fs.Debugf(fh.remote, "ReadFileHandle.Read fs.RangeSeeker failed: %v", err) 139 return err 140 } 141 } else { 142 fs.Debugf(fh.remote, "ReadFileHandle.seek from %d to %d", fh.offset, offset) 143 // close old one 144 err = oldReader.Close() 145 if err != nil { 146 fs.Debugf(fh.remote, "ReadFileHandle.Read seek close old failed: %v", err) 147 } 148 // re-open with a seek 149 o := fh.file.getObject() 150 r = chunkedreader.New(context.TODO(), o, int64(fh.file.d.vfs.Opt.ChunkSize), int64(fh.file.d.vfs.Opt.ChunkSizeLimit)) 151 _, err := r.Seek(offset, 0) 152 if err != nil { 153 fs.Debugf(fh.remote, "ReadFileHandle.Read seek failed: %v", err) 154 return err 155 } 156 r, err = r.Open() 157 if err != nil { 158 fs.Debugf(fh.remote, "ReadFileHandle.Read seek failed: %v", err) 159 return err 160 } 161 } 162 fh.r.UpdateReader(r) 163 fh.offset = offset 164 return nil 165 } 166 167 // Seek the file - returns ESPIPE if seeking isn't possible 168 func (fh *ReadFileHandle) Seek(offset int64, whence int) (n int64, err error) { 169 fh.mu.Lock() 170 defer fh.mu.Unlock() 171 if fh.noSeek { 172 return 0, ESPIPE 173 } 174 size := fh.size 175 switch whence { 176 case io.SeekStart: 177 fh.roffset = 0 178 case io.SeekEnd: 179 fh.roffset = size 180 } 181 fh.roffset += offset 182 // we don't check the offset - the next Read will 183 return fh.roffset, nil 184 } 185 186 // ReadAt reads len(p) bytes into p starting at offset off in the 187 // underlying input source. It returns the number of bytes read (0 <= 188 // n <= len(p)) and any error encountered. 189 // 190 // When ReadAt returns n < len(p), it returns a non-nil error 191 // explaining why more bytes were not returned. In this respect, 192 // ReadAt is stricter than Read. 193 // 194 // Even if ReadAt returns n < len(p), it may use all of p as scratch 195 // space during the call. If some data is available but not len(p) 196 // bytes, ReadAt blocks until either all the data is available or an 197 // error occurs. In this respect ReadAt is different from Read. 198 // 199 // If the n = len(p) bytes returned by ReadAt are at the end of the 200 // input source, ReadAt may return either err == EOF or err == nil. 201 // 202 // If ReadAt is reading from an input source with a seek offset, 203 // ReadAt should not affect nor be affected by the underlying seek 204 // offset. 205 // 206 // Clients of ReadAt can execute parallel ReadAt calls on the same 207 // input source. 208 // 209 // Implementations must not retain p. 210 func (fh *ReadFileHandle) ReadAt(p []byte, off int64) (n int, err error) { 211 fh.mu.Lock() 212 defer fh.mu.Unlock() 213 return fh.readAt(p, off) 214 } 215 216 // Implementation of ReadAt - call with lock held 217 func (fh *ReadFileHandle) readAt(p []byte, off int64) (n int, err error) { 218 // defer log.Trace(fh.remote, "p[%d], off=%d", len(p), off)("n=%d, err=%v", &n, &err) 219 err = fh.openPending() // FIXME pending open could be more efficient in the presense of seek (and retries) 220 if err != nil { 221 return 0, err 222 } 223 // fs.Debugf(fh.remote, "ReadFileHandle.Read size %d offset %d", reqSize, off) 224 if fh.closed { 225 fs.Errorf(fh.remote, "ReadFileHandle.Read error: %v", EBADF) 226 return 0, ECLOSED 227 } 228 maxBuf := 1024 * 1024 229 if len(p) < maxBuf { 230 maxBuf = len(p) 231 } 232 if gap := off - fh.offset; gap > 0 && gap < int64(8*maxBuf) { 233 // Set a background timer so we don't wait for long 234 // Waits here potentially affect all seeks so need to keep them short 235 // This time here was made by finding the smallest when mounting a local backend 236 // that didn't cause seeks. 237 const maxWait = 5 * time.Millisecond 238 timeout := time.NewTimer(maxWait) 239 done := make(chan struct{}) 240 abort := int32(0) 241 go func() { 242 select { 243 case <-timeout.C: 244 // set abort flag an give all the waiting goroutines a kick on timeout 245 atomic.StoreInt32(&abort, 1) 246 fs.Debugf(fh.remote, "aborting in-sequence read wait, off=%d", off) 247 fh.cond.Broadcast() 248 case <-done: 249 } 250 }() 251 for fh.offset != off && atomic.LoadInt32(&abort) == 0 { 252 fs.Debugf(fh.remote, "waiting for in-sequence read to %d for %v", off, maxWait) 253 fh.cond.Wait() 254 } 255 // tidy up end timer 256 close(done) 257 timeout.Stop() 258 if fh.offset != off { 259 fs.Debugf(fh.remote, "failed to wait for in-sequence read to %d", off) 260 } 261 } 262 doSeek := off != fh.offset 263 if doSeek && fh.noSeek { 264 return 0, ESPIPE 265 } 266 var newOffset int64 267 retries := 0 268 reqSize := len(p) 269 doReopen := false 270 for { 271 if doSeek { 272 // Are we attempting to seek beyond the end of the 273 // file - if so just return EOF leaving the underlying 274 // file in an unchanged state. 275 if off >= fh.size { 276 fs.Debugf(fh.remote, "ReadFileHandle.Read attempt to read beyond end of file: %d > %d", off, fh.size) 277 return 0, io.EOF 278 } 279 // Otherwise do the seek 280 err = fh.seek(off, doReopen) 281 } else { 282 err = nil 283 } 284 if err == nil { 285 if reqSize > 0 { 286 fh.readCalled = true 287 } 288 n, err = io.ReadFull(fh.r, p) 289 newOffset = fh.offset + int64(n) 290 // if err == nil && rand.Intn(10) == 0 { 291 // err = errors.New("random error") 292 // } 293 if err == nil { 294 break 295 } else if (err == io.ErrUnexpectedEOF || err == io.EOF) && (newOffset == fh.size || fh.sizeUnknown) { 296 if fh.sizeUnknown { 297 // size is now known since we have read to the end 298 fh.sizeUnknown = false 299 fh.size = newOffset 300 } 301 // Have read to end of file - reset error 302 err = nil 303 break 304 } 305 } 306 if retries >= fs.Config.LowLevelRetries { 307 break 308 } 309 retries++ 310 fs.Errorf(fh.remote, "ReadFileHandle.Read error: low level retry %d/%d: %v", retries, fs.Config.LowLevelRetries, err) 311 doSeek = true 312 doReopen = true 313 } 314 if err != nil { 315 fs.Errorf(fh.remote, "ReadFileHandle.Read error: %v", err) 316 } else { 317 fh.offset = newOffset 318 // fs.Debugf(fh.remote, "ReadFileHandle.Read OK") 319 320 if fh.hash != nil { 321 _, err = fh.hash.Write(p[:n]) 322 if err != nil { 323 fs.Errorf(fh.remote, "ReadFileHandle.Read HashError: %v", err) 324 return 0, err 325 } 326 } 327 328 // If we have no error and we didn't fill the buffer, must be EOF 329 if n != len(p) { 330 err = io.EOF 331 } 332 } 333 fh.cond.Broadcast() // wake everyone up waiting for an in-sequence read 334 return n, err 335 } 336 337 func (fh *ReadFileHandle) checkHash() error { 338 if fh.hash == nil || !fh.readCalled || fh.offset < fh.size { 339 return nil 340 } 341 342 o := fh.file.getObject() 343 for hashType, dstSum := range fh.hash.Sums() { 344 srcSum, err := o.Hash(context.TODO(), hashType) 345 if err != nil { 346 return err 347 } 348 if !hash.Equals(dstSum, srcSum) { 349 return errors.Errorf("corrupted on transfer: %v hash differ %q vs %q", hashType, dstSum, srcSum) 350 } 351 } 352 353 return nil 354 } 355 356 // Read reads up to len(p) bytes into p. It returns the number of bytes read (0 357 // <= n <= len(p)) and any error encountered. Even if Read returns n < len(p), 358 // it may use all of p as scratch space during the call. If some data is 359 // available but not len(p) bytes, Read conventionally returns what is 360 // available instead of waiting for more. 361 // 362 // When Read encounters an error or end-of-file condition after successfully 363 // reading n > 0 bytes, it returns the number of bytes read. It may return the 364 // (non-nil) error from the same call or return the error (and n == 0) from a 365 // subsequent call. An instance of this general case is that a Reader returning 366 // a non-zero number of bytes at the end of the input stream may return either 367 // err == EOF or err == nil. The next Read should return 0, EOF. 368 // 369 // Callers should always process the n > 0 bytes returned before considering 370 // the error err. Doing so correctly handles I/O errors that happen after 371 // reading some bytes and also both of the allowed EOF behaviors. 372 // 373 // Implementations of Read are discouraged from returning a zero byte count 374 // with a nil error, except when len(p) == 0. Callers should treat a return of 375 // 0 and nil as indicating that nothing happened; in particular it does not 376 // indicate EOF. 377 // 378 // Implementations must not retain p. 379 func (fh *ReadFileHandle) Read(p []byte) (n int, err error) { 380 fh.mu.Lock() 381 defer fh.mu.Unlock() 382 if fh.roffset >= fh.size && !fh.sizeUnknown { 383 return 0, io.EOF 384 } 385 n, err = fh.readAt(p, fh.roffset) 386 fh.roffset += int64(n) 387 return n, err 388 } 389 390 // close the file handle returning EBADF if it has been 391 // closed already. 392 // 393 // Must be called with fh.mu held 394 func (fh *ReadFileHandle) close() error { 395 if fh.closed { 396 return ECLOSED 397 } 398 fh.closed = true 399 400 if fh.opened { 401 var err error 402 defer func() { 403 fh.done(err) 404 }() 405 // Close first so that we have hashes 406 err = fh.r.Close() 407 if err != nil { 408 return err 409 } 410 // Now check the hash 411 err = fh.checkHash() 412 if err != nil { 413 return err 414 } 415 } 416 return nil 417 } 418 419 // Close closes the file 420 func (fh *ReadFileHandle) Close() error { 421 fh.mu.Lock() 422 defer fh.mu.Unlock() 423 return fh.close() 424 } 425 426 // Flush is called each time the file or directory is closed. 427 // Because there can be multiple file descriptors referring to a 428 // single opened file, Flush can be called multiple times. 429 func (fh *ReadFileHandle) Flush() error { 430 fh.mu.Lock() 431 defer fh.mu.Unlock() 432 if !fh.opened { 433 return nil 434 } 435 // fs.Debugf(fh.remote, "ReadFileHandle.Flush") 436 437 if err := fh.checkHash(); err != nil { 438 fs.Errorf(fh.remote, "ReadFileHandle.Flush error: %v", err) 439 return err 440 } 441 442 // fs.Debugf(fh.remote, "ReadFileHandle.Flush OK") 443 return nil 444 } 445 446 // Release is called when we are finished with the file handle 447 // 448 // It isn't called directly from userspace so the error is ignored by 449 // the kernel 450 func (fh *ReadFileHandle) Release() error { 451 fh.mu.Lock() 452 defer fh.mu.Unlock() 453 if !fh.opened { 454 return nil 455 } 456 if fh.closed { 457 fs.Debugf(fh.remote, "ReadFileHandle.Release nothing to do") 458 return nil 459 } 460 fs.Debugf(fh.remote, "ReadFileHandle.Release closing") 461 err := fh.close() 462 if err != nil { 463 fs.Errorf(fh.remote, "ReadFileHandle.Release error: %v", err) 464 } else { 465 // fs.Debugf(fh.remote, "ReadFileHandle.Release OK") 466 } 467 return err 468 } 469 470 // Size returns the size of the underlying file 471 func (fh *ReadFileHandle) Size() int64 { 472 fh.mu.Lock() 473 defer fh.mu.Unlock() 474 return fh.size 475 } 476 477 // Stat returns info about the file 478 func (fh *ReadFileHandle) Stat() (os.FileInfo, error) { 479 fh.mu.Lock() 480 defer fh.mu.Unlock() 481 return fh.file, nil 482 }