github.com/apache/arrow/go/v14@v14.0.2/parquet/internal/encoding/boolean_decoder.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package encoding
    18  
    19  import (
    20  	"github.com/apache/arrow/go/v14/arrow/bitutil"
    21  	shared_utils "github.com/apache/arrow/go/v14/internal/utils"
    22  	"github.com/apache/arrow/go/v14/parquet"
    23  	"github.com/apache/arrow/go/v14/parquet/internal/utils"
    24  	"golang.org/x/xerrors"
    25  )
    26  
    27  // PlainBooleanDecoder is for the Plain Encoding type, there is no
    28  // dictionary decoding for bools.
    29  type PlainBooleanDecoder struct {
    30  	decoder
    31  
    32  	bitOffset int
    33  }
    34  
    35  // Type for the PlainBooleanDecoder is parquet.Types.Boolean
    36  func (PlainBooleanDecoder) Type() parquet.Type {
    37  	return parquet.Types.Boolean
    38  }
    39  
    40  func (dec *PlainBooleanDecoder) SetData(nvals int, data []byte) error {
    41  	if err := dec.decoder.SetData(nvals, data); err != nil {
    42  		return err
    43  	}
    44  	dec.bitOffset = 0
    45  	return nil
    46  }
    47  
    48  // Decode fills out with bools decoded from the data at the current point
    49  // or until we reach the end of the data.
    50  //
    51  // Returns the number of values decoded
    52  func (dec *PlainBooleanDecoder) Decode(out []bool) (int, error) {
    53  	max := shared_utils.MinInt(len(out), dec.nvals)
    54  
    55  	// attempts to read all remaining bool values from the current data byte
    56  	unalignedExtract := func(i int) int {
    57  		for ; dec.bitOffset < 8 && i < max; i, dec.bitOffset = i+1, dec.bitOffset+1 {
    58  			out[i] = (dec.data[0] & byte(1<<dec.bitOffset)) != 0
    59  		}
    60  		if dec.bitOffset == 8 {
    61  			// we read every bit from this byte
    62  			dec.bitOffset = 0
    63  			dec.data = dec.data[1:] // move data forward
    64  		}
    65  		return i // return the next index for out[]
    66  	}
    67  
    68  	// if we aren't at a byte boundary, then get bools until we hit
    69  	// a byte boundary with the bit offset.
    70  	i := 0
    71  	if dec.bitOffset != 0 {
    72  		i = unalignedExtract(i)
    73  	}
    74  
    75  	// determine the number of full bytes worth of bits we can decode
    76  	// given the number of values we want to decode.
    77  	bitsRemain := max - i
    78  	batch := (bitsRemain / 8) * 8
    79  	if batch > 0 { // only go in here if there's at least one full byte to decode
    80  		// determine the number of aligned bytes we can grab using SIMD optimized
    81  		// functions to improve performance.
    82  		alignedBytes := bitutil.BytesForBits(int64(batch))
    83  		utils.BytesToBools(dec.data[:alignedBytes], out[i:])
    84  
    85  		dec.data = dec.data[alignedBytes:] // move data forward
    86  		i += int(alignedBytes) * 8
    87  	}
    88  
    89  	// grab any trailing bits now that we've got our aligned bytes.
    90  	_ = unalignedExtract(i)
    91  
    92  	dec.nvals -= max
    93  	return max, nil
    94  }
    95  
    96  // DecodeSpaced is like Decode except it expands the values to leave spaces for null
    97  // as determined by the validBits bitmap.
    98  func (dec *PlainBooleanDecoder) DecodeSpaced(out []bool, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
    99  	if nullCount > 0 {
   100  		toRead := len(out) - nullCount
   101  		valuesRead, err := dec.Decode(out[:toRead])
   102  		if err != nil {
   103  			return 0, err
   104  		}
   105  		if valuesRead != toRead {
   106  			return valuesRead, xerrors.New("parquet: boolean decoder: number of values / definition levels read did not match")
   107  		}
   108  		return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
   109  	}
   110  	return dec.Decode(out)
   111  }