github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/filter.go (about) 1 package parquet 2 3 // FilterRowReader constructs a RowReader which exposes rows from reader for 4 // which the predicate has returned true. 5 func FilterRowReader(reader RowReader, predicate func(Row) bool) RowReader { 6 f := &filterRowReader{reader: reader, predicate: predicate} 7 for i := range f.rows { 8 f.rows[i] = f.values[i : i : i+1] 9 } 10 return f 11 } 12 13 type filterRowReader struct { 14 reader RowReader 15 predicate func(Row) bool 16 rows [defaultRowBufferSize]Row 17 values [defaultRowBufferSize]Value 18 } 19 20 func (f *filterRowReader) ReadRows(rows []Row) (n int, err error) { 21 for n < len(rows) { 22 r := len(rows) - n 23 24 if r > len(f.rows) { 25 r = len(f.rows) 26 } 27 28 r, err = f.reader.ReadRows(f.rows[:r]) 29 30 for i := 0; i < r; i++ { 31 if f.predicate(f.rows[i]) { 32 rows[n] = append(rows[n][:0], f.rows[i]...) 33 n++ 34 } 35 } 36 37 if err != nil { 38 break 39 } 40 } 41 return n, err 42 } 43 44 // FilterRowWriter constructs a RowWriter which writes rows to writer for which 45 // the predicate has returned true. 46 func FilterRowWriter(writer RowWriter, predicate func(Row) bool) RowWriter { 47 return &filterRowWriter{writer: writer, predicate: predicate} 48 } 49 50 type filterRowWriter struct { 51 writer RowWriter 52 predicate func(Row) bool 53 rows [defaultRowBufferSize]Row 54 } 55 56 func (f *filterRowWriter) WriteRows(rows []Row) (n int, err error) { 57 defer func() { 58 clear := f.rows[:] 59 for i := range clear { 60 clearValues(clear[i]) 61 } 62 }() 63 64 for n < len(rows) { 65 i := 0 66 j := len(rows) - n 67 68 if j > len(f.rows) { 69 j = len(f.rows) 70 } 71 72 for _, row := range rows[n : n+j] { 73 if f.predicate(row) { 74 f.rows[i] = row 75 i++ 76 } 77 } 78 79 if i > 0 { 80 _, err := f.writer.WriteRows(f.rows[:i]) 81 if err != nil { 82 break 83 } 84 } 85 86 n += j 87 } 88 89 return n, err 90 }