github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/file/fsnodefuse/trailingbuf/trailingbuf.go (about) 1 package trailingbuf 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 8 "github.com/Schaudge/grailbase/errors" 9 "github.com/Schaudge/grailbase/file/internal/s3bufpool" 10 "github.com/Schaudge/grailbase/ioctx" 11 "github.com/Schaudge/grailbase/morebufio" 12 "github.com/Schaudge/grailbase/must" 13 ) 14 15 // ErrTooFarBehind is returned if a read goes too far behind the current position. 16 // It's set as a cause (which callers can unwrap) on some errors returned by ReadAt. 17 var ErrTooFarBehind = errors.New("trailbuf: read too far behind") 18 19 type ReaderAt struct { 20 // semaphore guards all subsequent fields. It's used to serialize operations. 21 semaphore chan struct{} 22 // r is the data source. 23 pr morebufio.PeekBackReader 24 // off is the number of bytes we've read from r. 25 off int64 26 // eof is true after r returns io.EOF. 27 eof bool 28 } 29 30 // New creates a ReaderAt that can respond to arbitrary reads as long as they're close 31 // to the current position. trailSize controls the max distance (controlling buffer space usage). 32 // Reads too far behind the current position return an error with cause ErrTooFarBehind. 33 // off is the current position of r (for example, zero for the start of a file, or non-zero for 34 // reading somewhere in the middle). 35 // Note: Alternatively, callers could manipulate the offsets in their ReadAt calls to be relative 36 // to r's initial position. However, since we put offsets in our error message strings, users may 37 // find debugging easier if they don't need to de-relativize the errors. 38 func New(r ioctx.Reader, off int64, trailSize int) *ReaderAt { 39 return &ReaderAt{ 40 semaphore: make(chan struct{}, 1), 41 pr: morebufio.NewPeekBackReader(r, trailSize), 42 off: off, 43 } 44 } 45 46 // ReadAt implements io.ReaderAt. 47 func (r *ReaderAt) ReadAt(ctx context.Context, dst []byte, off int64) (int, error) { 48 if len(dst) == 0 { 49 return 0, nil 50 } 51 if off < 0 { 52 return 0, errors.E(errors.Invalid, "trailbuf: negative offset") 53 } 54 55 select { 56 case r.semaphore <- struct{}{}: 57 defer func() { <-r.semaphore }() 58 case <-ctx.Done(): 59 return 0, ctx.Err() 60 } 61 62 var nDst int 63 // Try to peek backwards from r.off, if requested. 64 if back := r.off - off; back > 0 { 65 peekBack := r.pr.PeekBack() 66 if back > int64(len(peekBack)) { 67 return nDst, errors.E(errors.Invalid, ErrTooFarBehind, 68 fmt.Sprintf("trailbuf: read would seek backwards: request %d(%d), current pos %d(-%d)", 69 off, len(dst), r.off, len(peekBack))) 70 } 71 peekUsed := copy(dst, peekBack[len(peekBack)-int(back):]) 72 dst = dst[peekUsed:] 73 nDst += int(peekUsed) 74 off += int64(peekUsed) 75 } 76 // If we're already at EOF (so there's not enough data to reach off), or len(dst) 77 // is small enough (off + len(dst) < r.off), we exit early. 78 // Otherwise, we've advanced the request offset up to the current cursor and need to 79 // read more of the underlying stream. 80 if r.eof { 81 return nDst, io.EOF 82 } 83 if len(dst) == 0 { 84 return nDst, nil 85 } 86 must.Truef(off >= r.off, "%d, %d", off, r.off) 87 88 // Skip forward in r.pr, if necessary. 89 if skip := off - r.off; skip > 0 { 90 // Copying to io.Discard ends up using small chunks from an internal pool. This is a fairly 91 // pessimal S3 read size, so since we sometimes read from S3 streams here, we use larger 92 // buffers. 93 // 94 // Note that we may eventually want to use some internal read buffer for all S3 reads, so 95 // clients don't accidentally experience bad performance because their application happens 96 // to use a pattern of small reads. In that case, this special skip buffer would just add 97 // copies, and not help, and we may want to remove it. 98 discardBuf := s3bufpool.Get() 99 n, err := io.CopyBuffer( 100 // Hide io.Discard's io.ReadFrom implementation because CopyBuffer detects that and 101 // ignores our buffer. 102 struct{ io.Writer }{io.Discard}, 103 io.LimitReader(ioctx.ToStdReader(ctx, r.pr), skip), 104 *discardBuf) 105 s3bufpool.Put(discardBuf) 106 r.off += n 107 if n < skip { 108 r.eof = true 109 err = io.EOF 110 } 111 if err != nil { 112 return nDst, err 113 } 114 } 115 116 // Complete the read. 117 n, err := io.ReadFull(ioctx.ToStdReader(ctx, r.pr), dst) 118 r.off += int64(n) 119 nDst += n 120 if err == io.EOF || err == io.ErrUnexpectedEOF { 121 err = io.EOF 122 r.eof = true 123 } 124 return nDst, err 125 } 126 127 // Size returns the final number of bytes obtained from the underlying stream, if we've already 128 // found EOF, else _, false. 129 func (r *ReaderAt) Size(ctx context.Context) (size int64, known bool, err error) { 130 select { 131 case r.semaphore <- struct{}{}: 132 defer func() { <-r.semaphore }() 133 case <-ctx.Done(): 134 return 0, false, ctx.Err() 135 } 136 137 if r.eof { 138 return r.off, true, nil 139 } 140 return 0, false, nil 141 }