github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/fs/asyncreader/asyncreader.go (about) 1 // Package asyncreader provides an asynchronous reader which reads 2 // independently of write 3 package asyncreader 4 5 import ( 6 "context" 7 "errors" 8 "io" 9 "sync" 10 "time" 11 12 "github.com/rclone/rclone/fs" 13 "github.com/rclone/rclone/lib/pool" 14 "github.com/rclone/rclone/lib/readers" 15 ) 16 17 const ( 18 // BufferSize is the default size of the async buffer 19 BufferSize = 1024 * 1024 20 softStartInitial = 4 * 1024 21 bufferCacheSize = 64 // max number of buffers to keep in cache 22 bufferCacheFlushTime = 5 * time.Second // flush the cached buffers after this long 23 ) 24 25 // ErrorStreamAbandoned is returned when the input is closed before the end of the stream 26 var ErrorStreamAbandoned = errors.New("stream abandoned") 27 28 // AsyncReader will do async read-ahead from the input reader 29 // and make the data available as an io.Reader. 30 // This should be fully transparent, except that once an error 31 // has been returned from the Reader, it will not recover. 32 type AsyncReader struct { 33 in io.ReadCloser // Input reader 34 ready chan *buffer // Buffers ready to be handed to the reader 35 token chan struct{} // Tokens which allow a buffer to be taken 36 exit chan struct{} // Closes when finished 37 buffers int // Number of buffers 38 err error // If an error has occurred it is here 39 cur *buffer // Current buffer being served 40 exited chan struct{} // Channel is closed been the async reader shuts down 41 size int // size of buffer to use 42 closed bool // whether we have closed the underlying stream 43 mu sync.Mutex // lock for Read/WriteTo/Abandon/Close 44 ci *fs.ConfigInfo // for reading config 45 } 46 47 // New returns a reader that will asynchronously read from 48 // the supplied Reader into a number of buffers each of size BufferSize 49 // It will start reading from the input at once, maybe even before this 50 // function has returned. 51 // The input can be read from the returned reader. 52 // When done use Close to release the buffers and close the supplied input. 53 func New(ctx context.Context, rd io.ReadCloser, buffers int) (*AsyncReader, error) { 54 if buffers <= 0 { 55 return nil, errors.New("number of buffers too small") 56 } 57 if rd == nil { 58 return nil, errors.New("nil reader supplied") 59 } 60 a := &AsyncReader{ 61 ci: fs.GetConfig(ctx), 62 } 63 a.init(rd, buffers) 64 return a, nil 65 } 66 67 func (a *AsyncReader) init(rd io.ReadCloser, buffers int) { 68 a.in = rd 69 a.ready = make(chan *buffer, buffers) 70 a.token = make(chan struct{}, buffers) 71 a.exit = make(chan struct{}) 72 a.exited = make(chan struct{}) 73 a.buffers = buffers 74 a.cur = nil 75 a.size = softStartInitial 76 77 // Create tokens 78 for i := 0; i < buffers; i++ { 79 a.token <- struct{}{} 80 } 81 82 // Start async reader 83 go func() { 84 // Ensure that when we exit this is signalled. 85 defer close(a.exited) 86 defer close(a.ready) 87 for { 88 select { 89 case <-a.token: 90 b := a.getBuffer() 91 if a.size < BufferSize { 92 b.buf = b.buf[:a.size] 93 a.size <<= 1 94 } 95 err := b.read(a.in) 96 a.ready <- b 97 if err != nil { 98 return 99 } 100 case <-a.exit: 101 return 102 } 103 } 104 }() 105 } 106 107 // bufferPool is a global pool of buffers 108 var bufferPool *pool.Pool 109 var bufferPoolOnce sync.Once 110 111 // return the buffer to the pool (clearing it) 112 func (a *AsyncReader) putBuffer(b *buffer) { 113 bufferPool.Put(b.buf) 114 b.buf = nil 115 } 116 117 // get a buffer from the pool 118 func (a *AsyncReader) getBuffer() *buffer { 119 bufferPoolOnce.Do(func() { 120 // Initialise the buffer pool when used 121 bufferPool = pool.New(bufferCacheFlushTime, BufferSize, bufferCacheSize, a.ci.UseMmap) 122 }) 123 return &buffer{ 124 buf: bufferPool.Get(), 125 } 126 } 127 128 // Read will return the next available data. 129 func (a *AsyncReader) fill() (err error) { 130 if a.cur.isEmpty() { 131 if a.cur != nil { 132 a.putBuffer(a.cur) 133 a.token <- struct{}{} 134 a.cur = nil 135 } 136 b, ok := <-a.ready 137 if !ok { 138 // Return an error to show fill failed 139 if a.err == nil { 140 return ErrorStreamAbandoned 141 } 142 return a.err 143 } 144 a.cur = b 145 } 146 return nil 147 } 148 149 // Read will return the next available data. 150 func (a *AsyncReader) Read(p []byte) (n int, err error) { 151 a.mu.Lock() 152 defer a.mu.Unlock() 153 154 // Swap buffer and maybe return error 155 err = a.fill() 156 if err != nil { 157 return 0, err 158 } 159 160 // Copy what we can 161 n = copy(p, a.cur.buffer()) 162 a.cur.increment(n) 163 164 // If at end of buffer, return any error, if present 165 if a.cur.isEmpty() { 166 a.err = a.cur.err 167 return n, a.err 168 } 169 return n, nil 170 } 171 172 // WriteTo writes data to w until there's no more data to write or when an error occurs. 173 // The return value n is the number of bytes written. 174 // Any error encountered during the write is also returned. 175 func (a *AsyncReader) WriteTo(w io.Writer) (n int64, err error) { 176 a.mu.Lock() 177 defer a.mu.Unlock() 178 179 n = 0 180 for { 181 err = a.fill() 182 if err == io.EOF { 183 return n, nil 184 } 185 if err != nil { 186 return n, err 187 } 188 n2, err := w.Write(a.cur.buffer()) 189 a.cur.increment(n2) 190 n += int64(n2) 191 if err != nil { 192 return n, err 193 } 194 if a.cur.err == io.EOF { 195 a.err = a.cur.err 196 return n, err 197 } 198 if a.cur.err != nil { 199 a.err = a.cur.err 200 return n, a.cur.err 201 } 202 } 203 } 204 205 // SkipBytes will try to seek 'skip' bytes relative to the current position. 206 // On success it returns true. If 'skip' is outside the current buffer data or 207 // an error occurs, Abandon is called and false is returned. 208 func (a *AsyncReader) SkipBytes(skip int) (ok bool) { 209 a.mu.Lock() 210 defer func() { 211 a.mu.Unlock() 212 if !ok { 213 a.Abandon() 214 } 215 }() 216 217 if a.err != nil { 218 return false 219 } 220 if skip < 0 { 221 // seek backwards if skip is inside current buffer 222 if a.cur != nil && a.cur.offset+skip >= 0 { 223 a.cur.offset += skip 224 return true 225 } 226 return false 227 } 228 // early return if skip is past the maximum buffer capacity 229 if skip >= (len(a.ready)+1)*BufferSize { 230 return false 231 } 232 233 refillTokens := 0 234 for { 235 if a.cur.isEmpty() { 236 if a.cur != nil { 237 a.putBuffer(a.cur) 238 refillTokens++ 239 a.cur = nil 240 } 241 select { 242 case b, ok := <-a.ready: 243 if !ok { 244 return false 245 } 246 a.cur = b 247 default: 248 return false 249 } 250 } 251 252 n := len(a.cur.buffer()) 253 if n > skip { 254 n = skip 255 } 256 a.cur.increment(n) 257 skip -= n 258 if skip == 0 { 259 for ; refillTokens > 0; refillTokens-- { 260 a.token <- struct{}{} 261 } 262 // If at end of buffer, store any error, if present 263 if a.cur.isEmpty() && a.cur.err != nil { 264 a.err = a.cur.err 265 } 266 return true 267 } 268 if a.cur.err != nil { 269 a.err = a.cur.err 270 return false 271 } 272 } 273 } 274 275 // StopBuffering will ensure that the underlying async reader is shut 276 // down so no more is read from the input. 277 // 278 // This does not free the memory so Abandon() or Close() need to be 279 // called on the input. 280 // 281 // This does not wait for Read/WriteTo to complete so can be called 282 // concurrently to those. 283 func (a *AsyncReader) StopBuffering() { 284 select { 285 case <-a.exit: 286 // Do nothing if reader routine already exited 287 return 288 default: 289 } 290 // Close and wait for go routine 291 close(a.exit) 292 <-a.exited 293 } 294 295 // Abandon will ensure that the underlying async reader is shut down 296 // and memory is returned. It does everything but close the input. 297 // 298 // It will NOT close the input supplied on New. 299 func (a *AsyncReader) Abandon() { 300 a.StopBuffering() 301 // take the lock to wait for Read/WriteTo to complete 302 a.mu.Lock() 303 defer a.mu.Unlock() 304 // Return any outstanding buffers to the Pool 305 if a.cur != nil { 306 a.putBuffer(a.cur) 307 a.cur = nil 308 } 309 for b := range a.ready { 310 a.putBuffer(b) 311 } 312 } 313 314 // Close will ensure that the underlying async reader is shut down. 315 // It will also close the input supplied on New. 316 func (a *AsyncReader) Close() (err error) { 317 a.Abandon() 318 if a.closed { 319 return nil 320 } 321 a.closed = true 322 return a.in.Close() 323 } 324 325 // Internal buffer 326 // If an error is present, it must be returned 327 // once all buffer content has been served. 328 type buffer struct { 329 buf []byte 330 err error 331 offset int 332 } 333 334 // isEmpty returns true is offset is at end of 335 // buffer, or 336 func (b *buffer) isEmpty() bool { 337 if b == nil { 338 return true 339 } 340 if len(b.buf)-b.offset <= 0 { 341 return true 342 } 343 return false 344 } 345 346 // read into start of the buffer from the supplied reader, 347 // resets the offset and updates the size of the buffer. 348 // Any error encountered during the read is returned. 349 func (b *buffer) read(rd io.Reader) error { 350 var n int 351 n, b.err = readers.ReadFill(rd, b.buf) 352 b.buf = b.buf[0:n] 353 b.offset = 0 354 return b.err 355 } 356 357 // Return the buffer at current offset 358 func (b *buffer) buffer() []byte { 359 return b.buf[b.offset:] 360 } 361 362 // increment the offset 363 func (b *buffer) increment(n int) { 364 b.offset += n 365 }