github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/exec/buffer.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package exec
     6  
     7  import (
     8  	"context"
     9  
    10  	"github.com/grailbio/bigslice/frame"
    11  	"github.com/grailbio/bigslice/sliceio"
    12  )
    13  
    14  // TaskBuffer is an in-memory buffer of task output. It has the
    15  // ability to handle multiple partitions, and stores vectors of
    16  // records for efficiency.
    17  //
    18  // TaskBuffer layout is: partition, slices, frames.
    19  type taskBuffer [][]frame.Frame
    20  
    21  // Slice returns column vectors for the provided partition and global
    22  // offset. The returned offset indicates the position of the global
    23  // offset into the returned vectors. A returned offset of -1
    24  // indicates EOF. Slice is designed to perform zero-copy reads
    25  // from a taskBuffer.
    26  //
    27  // TODO(marius): Slicing is currently inefficient as it requires a
    28  // linear walk through the stored vectors. We should aggregate
    29  // lengths so that we can perform a binary search. Alternatively, we
    30  // can return a cookie from Slice that enables efficient resumption.
    31  func (b taskBuffer) Slice(partition, off int) (frame.Frame, int) {
    32  	beg, end := partition, partition+1
    33  	// Find the offset.
    34  	var n int
    35  	for i := beg; i < end; i++ {
    36  		for _, f := range b[i] {
    37  			l := f.Len()
    38  			if n+l > off {
    39  				return f, off - n
    40  			}
    41  			n += l
    42  		}
    43  	}
    44  	return frame.Frame{}, -1
    45  }
    46  
    47  type taskBufferReader struct {
    48  	q       taskBuffer
    49  	i, j, k int
    50  	closed  bool
    51  }
    52  
    53  func (r *taskBufferReader) Read(ctx context.Context, out frame.Frame) (int, error) {
    54  	if r.closed {
    55  		panic("closed")
    56  	}
    57  loop:
    58  	for {
    59  		switch {
    60  		case len(r.q) == r.i:
    61  			return 0, sliceio.EOF
    62  		case len(r.q[r.i]) == r.j:
    63  			r.i++
    64  			r.j, r.k = 0, 0
    65  		case r.q[r.i][r.j].Len() == r.k:
    66  			r.j++
    67  			r.k = 0
    68  		default:
    69  			break loop
    70  		}
    71  	}
    72  	buf := r.q[r.i][r.j]
    73  	n := out.Len()
    74  	if m := buf.Len() - r.k; m < n {
    75  		n = m
    76  	}
    77  	l := r.k + n
    78  	frame.Copy(out, r.q[r.i][r.j].Slice(r.k, l))
    79  	r.k = l
    80  	return n, nil
    81  }
    82  
    83  func (r *taskBufferReader) Close() error {
    84  	r.q = nil
    85  	r.closed = true
    86  	return nil
    87  }
    88  
    89  // Reader returns a Reader for a partition of the taskBuffer.
    90  func (b taskBuffer) Reader(partition int) sliceio.ReadCloser {
    91  	if len(b) == 0 {
    92  		return sliceio.NopCloser(sliceio.EmptyReader{})
    93  	}
    94  	return &taskBufferReader{q: b[partition : partition+1]}
    95  }