github.com/apache/arrow/go/v14@v14.0.2/parquet/internal/encoding/boolean_encoder.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package encoding
    18  
    19  import (
    20  	"github.com/apache/arrow/go/v14/arrow/bitutil"
    21  	"github.com/apache/arrow/go/v14/parquet"
    22  	"github.com/apache/arrow/go/v14/parquet/internal/utils"
    23  )
    24  
    25  const (
    26  	boolBufSize = 1024
    27  	boolsInBuf  = boolBufSize * 8
    28  )
    29  
    30  // PlainBooleanEncoder encodes bools as a bitmap as per the Plain Encoding
    31  type PlainBooleanEncoder struct {
    32  	encoder
    33  	bitsBuffer []byte
    34  	wr         utils.BitmapWriter
    35  }
    36  
    37  // Type for the PlainBooleanEncoder is parquet.Types.Boolean
    38  func (PlainBooleanEncoder) Type() parquet.Type {
    39  	return parquet.Types.Boolean
    40  }
    41  
    42  // Put encodes the contents of in into the underlying data buffer.
    43  func (enc *PlainBooleanEncoder) Put(in []bool) {
    44  	if enc.bitsBuffer == nil {
    45  		enc.bitsBuffer = make([]byte, boolBufSize)
    46  	}
    47  	if enc.wr == nil {
    48  		enc.wr = utils.NewBitmapWriter(enc.bitsBuffer, 0, boolsInBuf)
    49  	}
    50  	if len(in) == 0 {
    51  		return
    52  	}
    53  
    54  	n := enc.wr.AppendBools(in)
    55  	for n < len(in) {
    56  		enc.wr.Finish()
    57  		enc.append(enc.bitsBuffer)
    58  		enc.wr.Reset(0, boolsInBuf)
    59  		in = in[n:]
    60  		n = enc.wr.AppendBools(in)
    61  	}
    62  }
    63  
    64  // PutSpaced will use the validBits bitmap to determine which values are nulls
    65  // and can be left out from the slice, and the encoded without those nulls.
    66  func (enc *PlainBooleanEncoder) PutSpaced(in []bool, validBits []byte, validBitsOffset int64) {
    67  	bufferOut := make([]bool, len(in))
    68  	nvalid := spacedCompress(in, bufferOut, validBits, validBitsOffset)
    69  	enc.Put(bufferOut[:nvalid])
    70  }
    71  
    72  // EstimatedDataEncodedSize returns the current number of bytes that have
    73  // been buffered so far
    74  func (enc *PlainBooleanEncoder) EstimatedDataEncodedSize() int64 {
    75  	return int64(enc.sink.Len() + int(bitutil.BytesForBits(int64(enc.wr.Pos()))))
    76  }
    77  
    78  // FlushValues returns the buffered data, the responsibility is on the caller
    79  // to release the buffer memory
    80  func (enc *PlainBooleanEncoder) FlushValues() (Buffer, error) {
    81  	if enc.wr.Pos() > 0 {
    82  		toFlush := int(enc.wr.Pos())
    83  		enc.append(enc.bitsBuffer[:bitutil.BytesForBits(int64(toFlush))])
    84  	}
    85  
    86  	enc.wr.Reset(0, boolsInBuf)
    87  
    88  	return enc.sink.Finish(), nil
    89  }