github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/encoding/boolean_decoder.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package encoding
    18  
    19  import (
    20  	"github.com/apache/arrow/go/v7/arrow/bitutil"
    21  	"github.com/apache/arrow/go/v7/parquet"
    22  	"github.com/apache/arrow/go/v7/parquet/internal/utils"
    23  	"golang.org/x/xerrors"
    24  )
    25  
    26  // PlainBooleanDecoder is for the Plain Encoding type, there is no
    27  // dictionary decoding for bools.
    28  type PlainBooleanDecoder struct {
    29  	decoder
    30  
    31  	bitOffset int
    32  }
    33  
    34  // Type for the PlainBooleanDecoder is parquet.Types.Boolean
    35  func (PlainBooleanDecoder) Type() parquet.Type {
    36  	return parquet.Types.Boolean
    37  }
    38  
    39  // Decode fills out with bools decoded from the data at the current point
    40  // or until we reach the end of the data.
    41  //
    42  // Returns the number of values decoded
    43  func (dec *PlainBooleanDecoder) Decode(out []bool) (int, error) {
    44  	max := utils.MinInt(len(out), dec.nvals)
    45  
    46  	unalignedExtract := func(start, end, curBitOffset int) int {
    47  		i := start
    48  		for ; curBitOffset < end && i < max; i, curBitOffset = i+1, curBitOffset+1 {
    49  			out[i] = (dec.data[0] & byte(1<<curBitOffset)) != 0
    50  		}
    51  		return i // return the number of bits we extracted
    52  	}
    53  
    54  	// if we aren't at a byte boundary, then get bools until we hit
    55  	// a byte boundary with the bit offset.
    56  	i := 0
    57  	if dec.bitOffset != 0 {
    58  		i = unalignedExtract(0, 8, dec.bitOffset)
    59  		dec.bitOffset = (dec.bitOffset + i) % 8
    60  	}
    61  
    62  	// determine the number of full bytes worth of bits we can decode
    63  	// given the number of values we want to decode.
    64  	bitsRemain := max - i
    65  	batch := bitsRemain / 8 * 8
    66  	if batch > 0 { // only go in here if there's at least one full byte to decode
    67  		if i > 0 { // skip our data forward if we decoded anything above
    68  			dec.data = dec.data[1:]
    69  			out = out[i:]
    70  		}
    71  		// determine the number of aligned bytes we can grab using SIMD optimized
    72  		// functions to improve performance.
    73  		alignedBytes := bitutil.BytesForBits(int64(batch))
    74  		utils.BytesToBools(dec.data[:alignedBytes], out)
    75  		dec.data = dec.data[alignedBytes:]
    76  		out = out[alignedBytes*8:]
    77  	}
    78  
    79  	// grab any trailing bits now that we've got our aligned bytes.
    80  	dec.bitOffset += unalignedExtract(dec.bitOffset, bitsRemain-batch, dec.bitOffset)
    81  
    82  	dec.nvals -= max
    83  	return max, nil
    84  }
    85  
    86  // DecodeSpaced is like Decode except it expands the values to leave spaces for null
    87  // as determined by the validBits bitmap.
    88  func (dec *PlainBooleanDecoder) DecodeSpaced(out []bool, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
    89  	if nullCount > 0 {
    90  		toRead := len(out) - nullCount
    91  		valuesRead, err := dec.Decode(out[:toRead])
    92  		if err != nil {
    93  			return 0, err
    94  		}
    95  		if valuesRead != toRead {
    96  			return valuesRead, xerrors.New("parquet: boolean decoder: number of values / definition levels read did not match")
    97  		}
    98  		return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
    99  	}
   100  	return dec.Decode(out)
   101  }