github.com/fraugster/parquet-go@v0.12.0/type_boolean.go (about)

     1  package goparquet
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  
     7  	"github.com/fraugster/parquet-go/parquet"
     8  )
     9  
    10  type booleanPlainDecoder struct {
    11  	r    io.Reader
    12  	left []bool
    13  }
    14  
    15  // copy the left overs from the previous call. instead of returning an empty subset of the old slice,
    16  // it delete the slice (by returning nil) so there is no memory leak because of the underlying array
    17  // the return value is the new left over and the number of read message
    18  func copyLeftOvers(dst []interface{}, src []bool) ([]bool, int) {
    19  	size := len(dst)
    20  	var clean bool
    21  	if len(src) <= size {
    22  		size = len(src)
    23  		clean = true
    24  	}
    25  
    26  	for i := 0; i < size; i++ {
    27  		dst[i] = src[i]
    28  	}
    29  	if clean {
    30  		return nil, size
    31  	}
    32  
    33  	return src[size:], size
    34  }
    35  
    36  func (b *booleanPlainDecoder) init(r io.Reader) error {
    37  	b.r = r
    38  	b.left = nil
    39  
    40  	return nil
    41  }
    42  
    43  func (b *booleanPlainDecoder) decodeValues(dst []interface{}) (int, error) {
    44  	var start int
    45  	if len(b.left) > 0 {
    46  		// there is a leftover from the last run
    47  		b.left, start = copyLeftOvers(dst, b.left)
    48  		if b.left != nil {
    49  			return len(dst), nil
    50  		}
    51  	}
    52  
    53  	buf := make([]byte, 1)
    54  	for i := start; i < len(dst); i += 8 {
    55  		if _, err := io.ReadFull(b.r, buf); err != nil {
    56  			return i, err
    57  		}
    58  		d := unpack8int32_1(buf)
    59  		for j := 0; j < 8; j++ {
    60  			if i+j < len(dst) {
    61  				dst[i+j] = d[j] == 1
    62  			} else {
    63  				b.left = append(b.left, d[j] == 1)
    64  			}
    65  		}
    66  	}
    67  
    68  	return len(dst), nil
    69  }
    70  
    71  type booleanPlainEncoder struct {
    72  	w    io.Writer
    73  	data *packedArray
    74  }
    75  
    76  func (b *booleanPlainEncoder) Close() error {
    77  	b.data.flush()
    78  	return writeFull(b.w, b.data.data)
    79  }
    80  
    81  func (b *booleanPlainEncoder) init(w io.Writer) error {
    82  	b.w = w
    83  	b.data = &packedArray{}
    84  	b.data.reset(1)
    85  	return nil
    86  }
    87  
    88  func (b *booleanPlainEncoder) encodeValues(values []interface{}) error {
    89  	for i := range values {
    90  		var v int32
    91  		if values[i].(bool) {
    92  			v = 1
    93  		}
    94  		b.data.appendSingle(v)
    95  	}
    96  
    97  	return nil
    98  }
    99  
   100  type booleanRLEDecoder struct {
   101  	decoder *hybridDecoder
   102  }
   103  
   104  func (b *booleanRLEDecoder) init(r io.Reader) error {
   105  	b.decoder = newHybridDecoder(1)
   106  	return b.decoder.initSize(r)
   107  }
   108  
   109  func (b *booleanRLEDecoder) decodeValues(dst []interface{}) (int, error) {
   110  	total := len(dst)
   111  	for i := 0; i < total; i++ {
   112  		n, err := b.decoder.next()
   113  		if err != nil {
   114  			return i, err
   115  		}
   116  		dst[i] = n == 1
   117  	}
   118  
   119  	return total, nil
   120  }
   121  
   122  type booleanRLEEncoder struct {
   123  	encoder *hybridEncoder
   124  }
   125  
   126  func (b *booleanRLEEncoder) Close() error {
   127  	return b.encoder.Close()
   128  }
   129  
   130  func (b *booleanRLEEncoder) init(w io.Writer) error {
   131  	b.encoder = newHybridEncoder(1)
   132  	return b.encoder.initSize(w)
   133  }
   134  
   135  func (b *booleanRLEEncoder) encodeValues(values []interface{}) error {
   136  	buf := make([]int32, len(values))
   137  	for i := range values {
   138  		if values[i].(bool) {
   139  			buf[i] = 1
   140  		} else {
   141  			buf[i] = 0
   142  		}
   143  	}
   144  
   145  	return b.encoder.encode(buf)
   146  }
   147  
   148  type booleanStore struct {
   149  	repTyp parquet.FieldRepetitionType
   150  	*ColumnParameters
   151  }
   152  
   153  func (b *booleanStore) params() *ColumnParameters {
   154  	if b.ColumnParameters == nil {
   155  		panic("ColumnParameters is nil")
   156  	}
   157  	return b.ColumnParameters
   158  }
   159  
   160  func (b *booleanStore) sizeOf(v interface{}) int {
   161  	// Cheating here. boolean size is one bit, but the size is in byte. so zero to make sure
   162  	// we never use dictionary on this.
   163  	return 0
   164  }
   165  
   166  func (b *booleanStore) parquetType() parquet.Type {
   167  	return parquet.Type_BOOLEAN
   168  }
   169  
   170  func (b *booleanStore) repetitionType() parquet.FieldRepetitionType {
   171  	return b.repTyp
   172  }
   173  
   174  func (b *booleanStore) reset(repetitionType parquet.FieldRepetitionType) {
   175  	b.repTyp = repetitionType
   176  }
   177  
   178  func (b *booleanStore) getStats() minMaxValues {
   179  	return &nilStats{}
   180  }
   181  
   182  func (b *booleanStore) getPageStats() minMaxValues {
   183  	return &nilStats{}
   184  }
   185  
   186  func (b *booleanStore) getValues(v interface{}) ([]interface{}, error) {
   187  	var vals []interface{}
   188  	switch typed := v.(type) {
   189  	case bool:
   190  		vals = []interface{}{typed}
   191  	case []bool:
   192  		if b.repTyp != parquet.FieldRepetitionType_REPEATED {
   193  			return nil, fmt.Errorf("the value is not repeated but it is an array")
   194  		}
   195  		vals = make([]interface{}, len(typed))
   196  		for j := range typed {
   197  			vals[j] = typed[j]
   198  		}
   199  	default:
   200  		return nil, fmt.Errorf("unsupported type for storing in bool column: %T => %+v", v, v)
   201  	}
   202  
   203  	return vals, nil
   204  }
   205  
   206  func (b *booleanStore) append(arrayIn interface{}, value interface{}) interface{} {
   207  	if arrayIn == nil {
   208  		arrayIn = make([]bool, 0, 1)
   209  	}
   210  	return append(arrayIn.([]bool), value.(bool))
   211  }