github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/sliceio/scanner.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 package sliceio 6 7 import ( 8 "context" 9 "reflect" 10 11 "github.com/grailbio/base/errors" 12 "github.com/grailbio/bigslice/frame" 13 "github.com/grailbio/bigslice/slicetype" 14 "github.com/grailbio/bigslice/typecheck" 15 ) 16 17 // A Scanner provides a convenient interface for reading records 18 // (e.g. from a Slice or a shard of a Slice). Successive calls to 19 // Scan (or Scanv) returns the next record (batch of records). 20 // Scanning stops when no more data are available or if an error is 21 // encountered. Scan returns true while it's safe to continue 22 // scanning. When scanning is complete, the user should inspect the 23 // scanner's error to see if scanning stopped because of an EOF or 24 // because another error occurred. 25 // 26 // Callers should not mix calls to Scan and Scanv. 27 type Scanner struct { 28 typ slicetype.Type 29 reader ReadCloser 30 31 err error 32 started bool 33 in frame.Frame 34 beg, end int 35 atEOF bool 36 } 37 38 // NewScanner returns a new scanner of records of type typ from reader r. 39 func NewScanner(typ slicetype.Type, r ReadCloser) *Scanner { 40 return &Scanner{ 41 typ: typ, 42 reader: r, 43 } 44 } 45 46 // Scan the next record into the provided columns. Scanning fails if 47 // the columns do not match arity and type with the underlying data 48 // set. Scan returns true while no errors are encountered and there 49 // remains data to be scanned. Once Scan returns false, call Err to 50 // check for errors. 51 func (s *Scanner) Scan(ctx context.Context, out ...interface{}) bool { 52 if s.err != nil { 53 return false 54 } 55 if len(out) != s.typ.NumOut() { 56 s.err = typecheck.Errorf(1, "wrong arity: expected %d columns, got %d", s.typ.NumOut(), len(out)) 57 return false 58 } 59 for i := range out { 60 if got, want := reflect.TypeOf(out[i]), reflect.PtrTo(s.typ.Out(i)); got != want { 61 s.err = typecheck.Errorf(1, "wrong type for argument %d: expected %s, got %s", i, want, got) 62 return false 63 } 64 } 65 if !s.started { 66 s.started = true 67 s.in = frame.Make(s.typ, defaultChunksize, defaultChunksize) 68 s.beg, s.end = 0, 0 69 } 70 // Read the next batch of input. 71 for s.beg == s.end { 72 if s.atEOF { 73 s.err = EOF 74 return false 75 } 76 n, err := s.reader.Read(ctx, s.in) 77 if err != nil && err != EOF { 78 s.err = err 79 return false 80 } 81 s.beg, s.end = 0, n 82 if err == EOF { 83 s.atEOF = true 84 } 85 } 86 // TODO(marius): this can be made faster 87 for i, col := range out { 88 reflect.ValueOf(col).Elem().Set(s.in.Index(i, s.beg)) 89 } 90 s.beg++ 91 return true 92 } 93 94 // Close releases resources used by the scanner. This must be called exactly 95 // once on the scanner returned by NewScanner. 96 func (s *Scanner) Close() error { 97 if err := s.reader.Close(); err != nil { 98 return errors.E("error closing scanner", err) 99 } 100 return nil 101 } 102 103 // Scanv scans a batch of elements into the provided column vectors. 104 // Each column should be a slice of the correct type. Scanv fails 105 // when the type or arity of the column vectors do not match the 106 // underlying dataset. The number of records scanned is returned 107 // together with a boolean indicating whether scanning should 108 // continue, as in Scan. Once Scan returns false, call Err to 109 // check for errors. 110 func (s *Scanner) Scanv(ctx context.Context, out ...interface{}) (int, bool) { 111 // TODO(marius): vectorize this all the way down 112 if s.err != nil { 113 return 0, false 114 } 115 columnvs := make([]reflect.Value, len(out)) 116 for i := range out { 117 columnvs[i] = reflect.ValueOf(out[i]) 118 if columnvs[i].Kind() != reflect.Slice { 119 panic("passed in non-slice column") 120 } 121 } 122 n := columnvs[0].Len() 123 for i := 0; i < n; i++ { 124 args := make([]interface{}, len(out)) 125 for j := range args { 126 args[j] = columnvs[j].Index(i).Addr().Interface() 127 } 128 if !s.Scan(ctx, args...) { 129 return i, false 130 } 131 } 132 return n, true 133 } 134 135 // Err returns any error that occurred while scanning. 136 func (s *Scanner) Err() error { 137 if s.err == EOF { 138 return nil 139 } 140 return s.err 141 }