github.com/apache/arrow/go/v14@v14.0.2/parquet/internal/encoding/boolean_encoder.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package encoding 18 19 import ( 20 "github.com/apache/arrow/go/v14/arrow/bitutil" 21 "github.com/apache/arrow/go/v14/parquet" 22 "github.com/apache/arrow/go/v14/parquet/internal/utils" 23 ) 24 25 const ( 26 boolBufSize = 1024 27 boolsInBuf = boolBufSize * 8 28 ) 29 30 // PlainBooleanEncoder encodes bools as a bitmap as per the Plain Encoding 31 type PlainBooleanEncoder struct { 32 encoder 33 bitsBuffer []byte 34 wr utils.BitmapWriter 35 } 36 37 // Type for the PlainBooleanEncoder is parquet.Types.Boolean 38 func (PlainBooleanEncoder) Type() parquet.Type { 39 return parquet.Types.Boolean 40 } 41 42 // Put encodes the contents of in into the underlying data buffer. 43 func (enc *PlainBooleanEncoder) Put(in []bool) { 44 if enc.bitsBuffer == nil { 45 enc.bitsBuffer = make([]byte, boolBufSize) 46 } 47 if enc.wr == nil { 48 enc.wr = utils.NewBitmapWriter(enc.bitsBuffer, 0, boolsInBuf) 49 } 50 if len(in) == 0 { 51 return 52 } 53 54 n := enc.wr.AppendBools(in) 55 for n < len(in) { 56 enc.wr.Finish() 57 enc.append(enc.bitsBuffer) 58 enc.wr.Reset(0, boolsInBuf) 59 in = in[n:] 60 n = enc.wr.AppendBools(in) 61 } 62 } 63 64 // PutSpaced will use the validBits bitmap to determine which values are nulls 65 // and can be left out from the slice, and the encoded without those nulls. 66 func (enc *PlainBooleanEncoder) PutSpaced(in []bool, validBits []byte, validBitsOffset int64) { 67 bufferOut := make([]bool, len(in)) 68 nvalid := spacedCompress(in, bufferOut, validBits, validBitsOffset) 69 enc.Put(bufferOut[:nvalid]) 70 } 71 72 // EstimatedDataEncodedSize returns the current number of bytes that have 73 // been buffered so far 74 func (enc *PlainBooleanEncoder) EstimatedDataEncodedSize() int64 { 75 return int64(enc.sink.Len() + int(bitutil.BytesForBits(int64(enc.wr.Pos())))) 76 } 77 78 // FlushValues returns the buffered data, the responsibility is on the caller 79 // to release the buffer memory 80 func (enc *PlainBooleanEncoder) FlushValues() (Buffer, error) { 81 if enc.wr.Pos() > 0 { 82 toFlush := int(enc.wr.Pos()) 83 enc.append(enc.bitsBuffer[:bitutil.BytesForBits(int64(toFlush))]) 84 } 85 86 enc.wr.Reset(0, boolsInBuf) 87 88 return enc.sink.Finish(), nil 89 }