github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/filter.go (about)

     1  package parquet
     2  
     3  // FilterRowReader constructs a RowReader which exposes rows from reader for
     4  // which the predicate has returned true.
     5  func FilterRowReader(reader RowReader, predicate func(Row) bool) RowReader {
     6  	f := &filterRowReader{reader: reader, predicate: predicate}
     7  	for i := range f.rows {
     8  		f.rows[i] = f.values[i : i : i+1]
     9  	}
    10  	return f
    11  }
    12  
    13  type filterRowReader struct {
    14  	reader    RowReader
    15  	predicate func(Row) bool
    16  	rows      [defaultRowBufferSize]Row
    17  	values    [defaultRowBufferSize]Value
    18  }
    19  
    20  func (f *filterRowReader) ReadRows(rows []Row) (n int, err error) {
    21  	for n < len(rows) {
    22  		r := len(rows) - n
    23  
    24  		if r > len(f.rows) {
    25  			r = len(f.rows)
    26  		}
    27  
    28  		r, err = f.reader.ReadRows(f.rows[:r])
    29  
    30  		for i := 0; i < r; i++ {
    31  			if f.predicate(f.rows[i]) {
    32  				rows[n] = append(rows[n][:0], f.rows[i]...)
    33  				n++
    34  			}
    35  		}
    36  
    37  		if err != nil {
    38  			break
    39  		}
    40  	}
    41  	return n, err
    42  }
    43  
    44  // FilterRowWriter constructs a RowWriter which writes rows to writer for which
    45  // the predicate has returned true.
    46  func FilterRowWriter(writer RowWriter, predicate func(Row) bool) RowWriter {
    47  	return &filterRowWriter{writer: writer, predicate: predicate}
    48  }
    49  
    50  type filterRowWriter struct {
    51  	writer    RowWriter
    52  	predicate func(Row) bool
    53  	rows      [defaultRowBufferSize]Row
    54  }
    55  
    56  func (f *filterRowWriter) WriteRows(rows []Row) (n int, err error) {
    57  	defer func() {
    58  		clear := f.rows[:]
    59  		for i := range clear {
    60  			clearValues(clear[i])
    61  		}
    62  	}()
    63  
    64  	for n < len(rows) {
    65  		i := 0
    66  		j := len(rows) - n
    67  
    68  		if j > len(f.rows) {
    69  			j = len(f.rows)
    70  		}
    71  
    72  		for _, row := range rows[n : n+j] {
    73  			if f.predicate(row) {
    74  				f.rows[i] = row
    75  				i++
    76  			}
    77  		}
    78  
    79  		if i > 0 {
    80  			_, err := f.writer.WriteRows(f.rows[:i])
    81  			if err != nil {
    82  				break
    83  			}
    84  		}
    85  
    86  		n += j
    87  	}
    88  
    89  	return n, err
    90  }