github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/sliceio/reader.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 // Package sliceio provides utilities for managing I/O for Bigslice 6 // operations. 7 package sliceio 8 9 import ( 10 "context" 11 "io" 12 "reflect" 13 "runtime/pprof" 14 15 "github.com/grailbio/base/errors" 16 "github.com/grailbio/bigslice/frame" 17 "github.com/grailbio/bigslice/internal/defaultsize" 18 "github.com/grailbio/bigslice/slicetype" 19 ) 20 21 // DefaultChunksize is the default size used for I/O vectors within the 22 // sliceio package. 23 var defaultChunksize = defaultsize.Chunk 24 25 // EOF is the error returned by Reader.Read when no more data is 26 // available. EOF is intended as a sentinel error: it signals a 27 // graceful end of output. If output terminates unexpectedly, a 28 // different error should be returned. 29 var EOF = errors.New("EOF") 30 31 // A Reader represents a stateful stream of records. Each call to 32 // Read reads the next set of available records. 33 type Reader interface { 34 // Read reads a vector of records from the underlying Slice. Each 35 // passed-in column should be a value containing a slice of column 36 // values. The number of columns should match the number of columns 37 // in the slice; their types should match the corresponding column 38 // types of the slice. Each column should have the same slice 39 // length. 40 // 41 // Read returns the total number of records read, or an error. When 42 // no more records are available, Read returns EOF. Read may return 43 // EOF when n > 0. In this case, n records were read, but no more 44 // are available. 45 // 46 // Read should never reuse any allocated memory in the frame; 47 // its callers should not mutate the data returned. 48 // 49 // Read should not be called concurrently. 50 Read(ctx context.Context, frame frame.Frame) (int, error) 51 } 52 53 // ReadCloser groups the Read and Close methods. 54 type ReadCloser interface { 55 Reader 56 io.Closer 57 } 58 59 // nopCloser decorates a reader with a no-op Close method. Use it to adapt a 60 // Reader to a ReadCloser when the Reader has no resources to release on Close. 61 type nopCloser struct { 62 Reader 63 } 64 65 func (nopCloser) Close() error { 66 return nil 67 } 68 69 func NopCloser(r Reader) ReadCloser { 70 return nopCloser{r} 71 } 72 73 type multiReader struct { 74 q []ReadCloser 75 err error 76 } 77 78 // MultiReader returns a ReadCloser that's the logical concatenation of the 79 // provided input readers. Once every underlying ReadCloser has returned EOF, 80 // Read will return EOF, too. Non-EOF errors are returned immediately. 81 func MultiReader(readers ...ReadCloser) ReadCloser { 82 return &multiReader{q: readers} 83 } 84 85 func (m *multiReader) Read(ctx context.Context, out frame.Frame) (n int, err error) { 86 if m.err != nil { 87 return 0, m.err 88 } 89 for len(m.q) > 0 { 90 n, err := m.q[0].Read(ctx, out) 91 switch { 92 case err == EOF: 93 // There's not much for us to do if the Close fails, so we just 94 // ignore it. 95 _ = m.q[0].Close() 96 m.q[0] = nil 97 m.q = m.q[1:] 98 case err != nil: 99 m.err = err 100 return n, err 101 case n > 0: 102 return n, err 103 } 104 } 105 return 0, EOF 106 } 107 108 func (m *multiReader) Close() error { 109 var err error 110 for i, r := range m.q { 111 if r == nil { 112 continue 113 } 114 cerr := r.Close() 115 if err == nil { 116 err = cerr 117 } 118 m.q[i] = nil 119 } 120 return err 121 } 122 123 // FrameReader implements a Reader for a single Frame. 124 type frameReader struct { 125 frame.Frame 126 } 127 128 // FrameReader returns a Reader that reads the provided 129 // Frame to completion. 130 func FrameReader(frame frame.Frame) Reader { 131 return &frameReader{frame} 132 } 133 134 func (f *frameReader) Read(ctx context.Context, out frame.Frame) (int, error) { 135 n := out.Len() 136 max := f.Frame.Len() 137 if max < n { 138 n = max 139 } 140 frame.Copy(out, f.Frame) 141 f.Frame = f.Frame.Slice(n, max) 142 if f.Frame.Len() == 0 { 143 return n, EOF 144 } 145 return n, nil 146 } 147 148 // ReadAll copies all elements from reader r into the provided column 149 // pointers. ReadAll is not tuned for performance and is intended for 150 // testing purposes. 151 func ReadAll(ctx context.Context, r Reader, columns ...interface{}) error { 152 columnsv := make([]reflect.Value, len(columns)) 153 types := make([]reflect.Type, len(columns)) 154 for i := range columns { 155 columnsv[i] = reflect.ValueOf(columns[i]) 156 if columnsv[i].Type().Kind() != reflect.Ptr { 157 return errors.E(errors.Invalid, "attempted to read into non-pointer") 158 } 159 types[i] = reflect.TypeOf(columns[i]).Elem().Elem() 160 } 161 buf := frame.Make(slicetype.New(types...), defaultChunksize, defaultChunksize) 162 for { 163 n, err := r.Read(ctx, buf) 164 if err != nil && err != EOF { 165 return err 166 } 167 buf = buf.Slice(0, n) 168 for i := range columnsv { 169 columnsv[i].Elem().Set(reflect.AppendSlice(columnsv[i].Elem(), buf.Value(i))) 170 } 171 if err == EOF { 172 break 173 } 174 buf = buf.Slice(0, buf.Cap()) 175 } 176 return nil 177 } 178 179 // ReadFull reads the full length of the frame. ReadFull reads short 180 // frames only on EOF. 181 func ReadFull(ctx context.Context, r Reader, f frame.Frame) (n int, err error) { 182 len := f.Len() 183 for n < len { 184 m, err := r.Read(ctx, f.Slice(n, len)) 185 n += m 186 if err != nil { 187 return n, err 188 } 189 } 190 return n, nil 191 } 192 193 // An errReader is a reader that only returns errors. 194 type errReader struct{ Err error } 195 196 // ErrReader returns a reader that returns the provided error 197 // on every call to read. ErrReader panics if err is nil. 198 func ErrReader(err error) Reader { 199 if err == nil { 200 panic("nil error") 201 } 202 return &errReader{err} 203 } 204 205 func (e errReader) Read(ctx context.Context, f frame.Frame) (int, error) { 206 return 0, e.Err 207 } 208 209 // ReaderWithCloseFunc is a ReadCloser that wraps an existing Reader and uses a 210 // provided function for its Close. 211 type ReaderWithCloseFunc struct { 212 Reader 213 CloseFunc func() error 214 } 215 216 // Close implements io.Closer. 217 func (r ReaderWithCloseFunc) Close() error { 218 return r.CloseFunc() 219 } 220 221 // TODO(jcharumilind): Get rid of ClosingReader, as it makes it too tempting to 222 // not properly handle errors. We use it in cases where we expect to read from 223 // many readers (e.g. mergeReader). On failure, we should close all of them, but 224 // ClosingReader obscures this a bit and makes it so that the only way to close 225 // is by reading until non-nil error. 226 227 // ClosingReader closes the wrapped ReadCloser when Read returns any error. 228 type ClosingReader struct { 229 r ReadCloser 230 closed bool 231 } 232 233 // NewClosingReader returns a new ClosingReader for r. 234 func NewClosingReader(r ReadCloser) *ClosingReader { 235 return &ClosingReader{r: r} 236 } 237 238 // Read implements sliceio.Reader. 239 func (c *ClosingReader) Read(ctx context.Context, out frame.Frame) (int, error) { 240 n, err := c.r.Read(ctx, out) 241 if err != nil && !c.closed { 242 c.r.Close() 243 c.closed = true 244 } 245 return n, err 246 } 247 248 // EmptyReader returns an EOF. 249 type EmptyReader struct{} 250 251 func (EmptyReader) Read(ctx context.Context, f frame.Frame) (int, error) { 252 return 0, EOF 253 } 254 255 // PprofReader executes Read in a labeled Context. 256 type PprofReader struct { 257 Reader 258 Label string 259 } 260 261 func (r *PprofReader) Read(ctx context.Context, frame frame.Frame) (n int, err error) { 262 labels := pprof.Labels("sliceName", r.Label) 263 pprof.Do(ctx, labels, func(ctx context.Context) { 264 n, err = r.Reader.Read(ctx, frame) 265 }) 266 return 267 }