github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/exec/buffer.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 package exec 6 7 import ( 8 "context" 9 10 "github.com/grailbio/bigslice/frame" 11 "github.com/grailbio/bigslice/sliceio" 12 ) 13 14 // TaskBuffer is an in-memory buffer of task output. It has the 15 // ability to handle multiple partitions, and stores vectors of 16 // records for efficiency. 17 // 18 // TaskBuffer layout is: partition, slices, frames. 19 type taskBuffer [][]frame.Frame 20 21 // Slice returns column vectors for the provided partition and global 22 // offset. The returned offset indicates the position of the global 23 // offset into the returned vectors. A returned offset of -1 24 // indicates EOF. Slice is designed to perform zero-copy reads 25 // from a taskBuffer. 26 // 27 // TODO(marius): Slicing is currently inefficient as it requires a 28 // linear walk through the stored vectors. We should aggregate 29 // lengths so that we can perform a binary search. Alternatively, we 30 // can return a cookie from Slice that enables efficient resumption. 31 func (b taskBuffer) Slice(partition, off int) (frame.Frame, int) { 32 beg, end := partition, partition+1 33 // Find the offset. 34 var n int 35 for i := beg; i < end; i++ { 36 for _, f := range b[i] { 37 l := f.Len() 38 if n+l > off { 39 return f, off - n 40 } 41 n += l 42 } 43 } 44 return frame.Frame{}, -1 45 } 46 47 type taskBufferReader struct { 48 q taskBuffer 49 i, j, k int 50 closed bool 51 } 52 53 func (r *taskBufferReader) Read(ctx context.Context, out frame.Frame) (int, error) { 54 if r.closed { 55 panic("closed") 56 } 57 loop: 58 for { 59 switch { 60 case len(r.q) == r.i: 61 return 0, sliceio.EOF 62 case len(r.q[r.i]) == r.j: 63 r.i++ 64 r.j, r.k = 0, 0 65 case r.q[r.i][r.j].Len() == r.k: 66 r.j++ 67 r.k = 0 68 default: 69 break loop 70 } 71 } 72 buf := r.q[r.i][r.j] 73 n := out.Len() 74 if m := buf.Len() - r.k; m < n { 75 n = m 76 } 77 l := r.k + n 78 frame.Copy(out, r.q[r.i][r.j].Slice(r.k, l)) 79 r.k = l 80 return n, nil 81 } 82 83 func (r *taskBufferReader) Close() error { 84 r.q = nil 85 r.closed = true 86 return nil 87 } 88 89 // Reader returns a Reader for a partition of the taskBuffer. 90 func (b taskBuffer) Reader(partition int) sliceio.ReadCloser { 91 if len(b) == 0 { 92 return sliceio.NopCloser(sliceio.EmptyReader{}) 93 } 94 return &taskBufferReader{q: b[partition : partition+1]} 95 }