github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/sliceio/reader.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package sliceio provides utilities for managing I/O for Bigslice
     6  // operations.
     7  package sliceio
     8  
     9  import (
    10  	"context"
    11  	"io"
    12  	"reflect"
    13  	"runtime/pprof"
    14  
    15  	"github.com/grailbio/base/errors"
    16  	"github.com/grailbio/bigslice/frame"
    17  	"github.com/grailbio/bigslice/internal/defaultsize"
    18  	"github.com/grailbio/bigslice/slicetype"
    19  )
    20  
    21  // DefaultChunksize is the default size used for I/O vectors within the
    22  // sliceio package.
    23  var defaultChunksize = defaultsize.Chunk
    24  
    25  // EOF is the error returned by Reader.Read when no more data is
    26  // available. EOF is intended as a sentinel error: it signals a
    27  // graceful end of output. If output terminates unexpectedly, a
    28  // different error should be returned.
    29  var EOF = errors.New("EOF")
    30  
    31  // A Reader represents a stateful stream of records. Each call to
    32  // Read reads the next set of available records.
    33  type Reader interface {
    34  	// Read reads a vector of records from the underlying Slice. Each
    35  	// passed-in column should be a value containing a slice of column
    36  	// values. The number of columns should match the number of columns
    37  	// in the slice; their types should match the corresponding column
    38  	// types of the slice. Each column should have the same slice
    39  	// length.
    40  	//
    41  	// Read returns the total number of records read, or an error. When
    42  	// no more records are available, Read returns EOF. Read may return
    43  	// EOF when n > 0. In this case, n records were read, but no more
    44  	// are available.
    45  	//
    46  	// Read should never reuse any allocated memory in the frame;
    47  	// its callers should not mutate the data returned.
    48  	//
    49  	// Read should not be called concurrently.
    50  	Read(ctx context.Context, frame frame.Frame) (int, error)
    51  }
    52  
    53  // ReadCloser groups the Read and Close methods.
    54  type ReadCloser interface {
    55  	Reader
    56  	io.Closer
    57  }
    58  
    59  // nopCloser decorates a reader with a no-op Close method. Use it to adapt a
    60  // Reader to a ReadCloser when the Reader has no resources to release on Close.
    61  type nopCloser struct {
    62  	Reader
    63  }
    64  
    65  func (nopCloser) Close() error {
    66  	return nil
    67  }
    68  
    69  func NopCloser(r Reader) ReadCloser {
    70  	return nopCloser{r}
    71  }
    72  
    73  type multiReader struct {
    74  	q   []ReadCloser
    75  	err error
    76  }
    77  
    78  // MultiReader returns a ReadCloser that's the logical concatenation of the
    79  // provided input readers. Once every underlying ReadCloser has returned EOF,
    80  // Read will return EOF, too. Non-EOF errors are returned immediately.
    81  func MultiReader(readers ...ReadCloser) ReadCloser {
    82  	return &multiReader{q: readers}
    83  }
    84  
    85  func (m *multiReader) Read(ctx context.Context, out frame.Frame) (n int, err error) {
    86  	if m.err != nil {
    87  		return 0, m.err
    88  	}
    89  	for len(m.q) > 0 {
    90  		n, err := m.q[0].Read(ctx, out)
    91  		switch {
    92  		case err == EOF:
    93  			// There's not much for us to do if the Close fails, so we just
    94  			// ignore it.
    95  			_ = m.q[0].Close()
    96  			m.q[0] = nil
    97  			m.q = m.q[1:]
    98  		case err != nil:
    99  			m.err = err
   100  			return n, err
   101  		case n > 0:
   102  			return n, err
   103  		}
   104  	}
   105  	return 0, EOF
   106  }
   107  
   108  func (m *multiReader) Close() error {
   109  	var err error
   110  	for i, r := range m.q {
   111  		if r == nil {
   112  			continue
   113  		}
   114  		cerr := r.Close()
   115  		if err == nil {
   116  			err = cerr
   117  		}
   118  		m.q[i] = nil
   119  	}
   120  	return err
   121  }
   122  
   123  // FrameReader implements a Reader for a single Frame.
   124  type frameReader struct {
   125  	frame.Frame
   126  }
   127  
   128  // FrameReader returns a Reader that reads the provided
   129  // Frame to completion.
   130  func FrameReader(frame frame.Frame) Reader {
   131  	return &frameReader{frame}
   132  }
   133  
   134  func (f *frameReader) Read(ctx context.Context, out frame.Frame) (int, error) {
   135  	n := out.Len()
   136  	max := f.Frame.Len()
   137  	if max < n {
   138  		n = max
   139  	}
   140  	frame.Copy(out, f.Frame)
   141  	f.Frame = f.Frame.Slice(n, max)
   142  	if f.Frame.Len() == 0 {
   143  		return n, EOF
   144  	}
   145  	return n, nil
   146  }
   147  
   148  // ReadAll copies all elements from reader r into the provided column
   149  // pointers. ReadAll is not tuned for performance and is intended for
   150  // testing purposes.
   151  func ReadAll(ctx context.Context, r Reader, columns ...interface{}) error {
   152  	columnsv := make([]reflect.Value, len(columns))
   153  	types := make([]reflect.Type, len(columns))
   154  	for i := range columns {
   155  		columnsv[i] = reflect.ValueOf(columns[i])
   156  		if columnsv[i].Type().Kind() != reflect.Ptr {
   157  			return errors.E(errors.Invalid, "attempted to read into non-pointer")
   158  		}
   159  		types[i] = reflect.TypeOf(columns[i]).Elem().Elem()
   160  	}
   161  	buf := frame.Make(slicetype.New(types...), defaultChunksize, defaultChunksize)
   162  	for {
   163  		n, err := r.Read(ctx, buf)
   164  		if err != nil && err != EOF {
   165  			return err
   166  		}
   167  		buf = buf.Slice(0, n)
   168  		for i := range columnsv {
   169  			columnsv[i].Elem().Set(reflect.AppendSlice(columnsv[i].Elem(), buf.Value(i)))
   170  		}
   171  		if err == EOF {
   172  			break
   173  		}
   174  		buf = buf.Slice(0, buf.Cap())
   175  	}
   176  	return nil
   177  }
   178  
   179  // ReadFull reads the full length of the frame. ReadFull reads short
   180  // frames only on EOF.
   181  func ReadFull(ctx context.Context, r Reader, f frame.Frame) (n int, err error) {
   182  	len := f.Len()
   183  	for n < len {
   184  		m, err := r.Read(ctx, f.Slice(n, len))
   185  		n += m
   186  		if err != nil {
   187  			return n, err
   188  		}
   189  	}
   190  	return n, nil
   191  }
   192  
   193  // An errReader is a reader that only returns errors.
   194  type errReader struct{ Err error }
   195  
   196  // ErrReader returns a reader that returns the provided error
   197  // on every call to read. ErrReader panics if err is nil.
   198  func ErrReader(err error) Reader {
   199  	if err == nil {
   200  		panic("nil error")
   201  	}
   202  	return &errReader{err}
   203  }
   204  
   205  func (e errReader) Read(ctx context.Context, f frame.Frame) (int, error) {
   206  	return 0, e.Err
   207  }
   208  
   209  // ReaderWithCloseFunc is a ReadCloser that wraps an existing Reader and uses a
   210  // provided function for its Close.
   211  type ReaderWithCloseFunc struct {
   212  	Reader
   213  	CloseFunc func() error
   214  }
   215  
   216  // Close implements io.Closer.
   217  func (r ReaderWithCloseFunc) Close() error {
   218  	return r.CloseFunc()
   219  }
   220  
   221  // TODO(jcharumilind): Get rid of ClosingReader, as it makes it too tempting to
   222  // not properly handle errors. We use it in cases where we expect to read from
   223  // many readers (e.g. mergeReader). On failure, we should close all of them, but
   224  // ClosingReader obscures this a bit and makes it so that the only way to close
   225  // is by reading until non-nil error.
   226  
   227  // ClosingReader closes the wrapped ReadCloser when Read returns any error.
   228  type ClosingReader struct {
   229  	r      ReadCloser
   230  	closed bool
   231  }
   232  
   233  // NewClosingReader returns a new ClosingReader for r.
   234  func NewClosingReader(r ReadCloser) *ClosingReader {
   235  	return &ClosingReader{r: r}
   236  }
   237  
   238  // Read implements sliceio.Reader.
   239  func (c *ClosingReader) Read(ctx context.Context, out frame.Frame) (int, error) {
   240  	n, err := c.r.Read(ctx, out)
   241  	if err != nil && !c.closed {
   242  		c.r.Close()
   243  		c.closed = true
   244  	}
   245  	return n, err
   246  }
   247  
   248  // EmptyReader returns an EOF.
   249  type EmptyReader struct{}
   250  
   251  func (EmptyReader) Read(ctx context.Context, f frame.Frame) (int, error) {
   252  	return 0, EOF
   253  }
   254  
   255  // PprofReader executes Read in a labeled Context.
   256  type PprofReader struct {
   257  	Reader
   258  	Label string
   259  }
   260  
   261  func (r *PprofReader) Read(ctx context.Context, frame frame.Frame) (n int, err error) {
   262  	labels := pprof.Labels("sliceName", r.Label)
   263  	pprof.Do(ctx, labels, func(ctx context.Context) {
   264  		n, err = r.Reader.Read(ctx, frame)
   265  	})
   266  	return
   267  }