github.com/fraugster/parquet-go@v0.12.0/type_boolean.go (about) 1 package goparquet 2 3 import ( 4 "fmt" 5 "io" 6 7 "github.com/fraugster/parquet-go/parquet" 8 ) 9 10 type booleanPlainDecoder struct { 11 r io.Reader 12 left []bool 13 } 14 15 // copy the left overs from the previous call. instead of returning an empty subset of the old slice, 16 // it delete the slice (by returning nil) so there is no memory leak because of the underlying array 17 // the return value is the new left over and the number of read message 18 func copyLeftOvers(dst []interface{}, src []bool) ([]bool, int) { 19 size := len(dst) 20 var clean bool 21 if len(src) <= size { 22 size = len(src) 23 clean = true 24 } 25 26 for i := 0; i < size; i++ { 27 dst[i] = src[i] 28 } 29 if clean { 30 return nil, size 31 } 32 33 return src[size:], size 34 } 35 36 func (b *booleanPlainDecoder) init(r io.Reader) error { 37 b.r = r 38 b.left = nil 39 40 return nil 41 } 42 43 func (b *booleanPlainDecoder) decodeValues(dst []interface{}) (int, error) { 44 var start int 45 if len(b.left) > 0 { 46 // there is a leftover from the last run 47 b.left, start = copyLeftOvers(dst, b.left) 48 if b.left != nil { 49 return len(dst), nil 50 } 51 } 52 53 buf := make([]byte, 1) 54 for i := start; i < len(dst); i += 8 { 55 if _, err := io.ReadFull(b.r, buf); err != nil { 56 return i, err 57 } 58 d := unpack8int32_1(buf) 59 for j := 0; j < 8; j++ { 60 if i+j < len(dst) { 61 dst[i+j] = d[j] == 1 62 } else { 63 b.left = append(b.left, d[j] == 1) 64 } 65 } 66 } 67 68 return len(dst), nil 69 } 70 71 type booleanPlainEncoder struct { 72 w io.Writer 73 data *packedArray 74 } 75 76 func (b *booleanPlainEncoder) Close() error { 77 b.data.flush() 78 return writeFull(b.w, b.data.data) 79 } 80 81 func (b *booleanPlainEncoder) init(w io.Writer) error { 82 b.w = w 83 b.data = &packedArray{} 84 b.data.reset(1) 85 return nil 86 } 87 88 func (b *booleanPlainEncoder) encodeValues(values []interface{}) error { 89 for i := range values { 90 var v int32 91 if values[i].(bool) { 92 v = 1 93 } 94 b.data.appendSingle(v) 95 } 96 97 return nil 98 } 99 100 type booleanRLEDecoder struct { 101 decoder *hybridDecoder 102 } 103 104 func (b *booleanRLEDecoder) init(r io.Reader) error { 105 b.decoder = newHybridDecoder(1) 106 return b.decoder.initSize(r) 107 } 108 109 func (b *booleanRLEDecoder) decodeValues(dst []interface{}) (int, error) { 110 total := len(dst) 111 for i := 0; i < total; i++ { 112 n, err := b.decoder.next() 113 if err != nil { 114 return i, err 115 } 116 dst[i] = n == 1 117 } 118 119 return total, nil 120 } 121 122 type booleanRLEEncoder struct { 123 encoder *hybridEncoder 124 } 125 126 func (b *booleanRLEEncoder) Close() error { 127 return b.encoder.Close() 128 } 129 130 func (b *booleanRLEEncoder) init(w io.Writer) error { 131 b.encoder = newHybridEncoder(1) 132 return b.encoder.initSize(w) 133 } 134 135 func (b *booleanRLEEncoder) encodeValues(values []interface{}) error { 136 buf := make([]int32, len(values)) 137 for i := range values { 138 if values[i].(bool) { 139 buf[i] = 1 140 } else { 141 buf[i] = 0 142 } 143 } 144 145 return b.encoder.encode(buf) 146 } 147 148 type booleanStore struct { 149 repTyp parquet.FieldRepetitionType 150 *ColumnParameters 151 } 152 153 func (b *booleanStore) params() *ColumnParameters { 154 if b.ColumnParameters == nil { 155 panic("ColumnParameters is nil") 156 } 157 return b.ColumnParameters 158 } 159 160 func (b *booleanStore) sizeOf(v interface{}) int { 161 // Cheating here. boolean size is one bit, but the size is in byte. so zero to make sure 162 // we never use dictionary on this. 163 return 0 164 } 165 166 func (b *booleanStore) parquetType() parquet.Type { 167 return parquet.Type_BOOLEAN 168 } 169 170 func (b *booleanStore) repetitionType() parquet.FieldRepetitionType { 171 return b.repTyp 172 } 173 174 func (b *booleanStore) reset(repetitionType parquet.FieldRepetitionType) { 175 b.repTyp = repetitionType 176 } 177 178 func (b *booleanStore) getStats() minMaxValues { 179 return &nilStats{} 180 } 181 182 func (b *booleanStore) getPageStats() minMaxValues { 183 return &nilStats{} 184 } 185 186 func (b *booleanStore) getValues(v interface{}) ([]interface{}, error) { 187 var vals []interface{} 188 switch typed := v.(type) { 189 case bool: 190 vals = []interface{}{typed} 191 case []bool: 192 if b.repTyp != parquet.FieldRepetitionType_REPEATED { 193 return nil, fmt.Errorf("the value is not repeated but it is an array") 194 } 195 vals = make([]interface{}, len(typed)) 196 for j := range typed { 197 vals[j] = typed[j] 198 } 199 default: 200 return nil, fmt.Errorf("unsupported type for storing in bool column: %T => %+v", v, v) 201 } 202 203 return vals, nil 204 } 205 206 func (b *booleanStore) append(arrayIn interface{}, value interface{}) interface{} { 207 if arrayIn == nil { 208 arrayIn = make([]bool, 0, 1) 209 } 210 return append(arrayIn.([]bool), value.(bool)) 211 }