github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/encoding/boolean_decoder.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package encoding 18 19 import ( 20 "github.com/apache/arrow/go/v7/arrow/bitutil" 21 "github.com/apache/arrow/go/v7/parquet" 22 "github.com/apache/arrow/go/v7/parquet/internal/utils" 23 "golang.org/x/xerrors" 24 ) 25 26 // PlainBooleanDecoder is for the Plain Encoding type, there is no 27 // dictionary decoding for bools. 28 type PlainBooleanDecoder struct { 29 decoder 30 31 bitOffset int 32 } 33 34 // Type for the PlainBooleanDecoder is parquet.Types.Boolean 35 func (PlainBooleanDecoder) Type() parquet.Type { 36 return parquet.Types.Boolean 37 } 38 39 // Decode fills out with bools decoded from the data at the current point 40 // or until we reach the end of the data. 41 // 42 // Returns the number of values decoded 43 func (dec *PlainBooleanDecoder) Decode(out []bool) (int, error) { 44 max := utils.MinInt(len(out), dec.nvals) 45 46 unalignedExtract := func(start, end, curBitOffset int) int { 47 i := start 48 for ; curBitOffset < end && i < max; i, curBitOffset = i+1, curBitOffset+1 { 49 out[i] = (dec.data[0] & byte(1<<curBitOffset)) != 0 50 } 51 return i // return the number of bits we extracted 52 } 53 54 // if we aren't at a byte boundary, then get bools until we hit 55 // a byte boundary with the bit offset. 56 i := 0 57 if dec.bitOffset != 0 { 58 i = unalignedExtract(0, 8, dec.bitOffset) 59 dec.bitOffset = (dec.bitOffset + i) % 8 60 } 61 62 // determine the number of full bytes worth of bits we can decode 63 // given the number of values we want to decode. 64 bitsRemain := max - i 65 batch := bitsRemain / 8 * 8 66 if batch > 0 { // only go in here if there's at least one full byte to decode 67 if i > 0 { // skip our data forward if we decoded anything above 68 dec.data = dec.data[1:] 69 out = out[i:] 70 } 71 // determine the number of aligned bytes we can grab using SIMD optimized 72 // functions to improve performance. 73 alignedBytes := bitutil.BytesForBits(int64(batch)) 74 utils.BytesToBools(dec.data[:alignedBytes], out) 75 dec.data = dec.data[alignedBytes:] 76 out = out[alignedBytes*8:] 77 } 78 79 // grab any trailing bits now that we've got our aligned bytes. 80 dec.bitOffset += unalignedExtract(dec.bitOffset, bitsRemain-batch, dec.bitOffset) 81 82 dec.nvals -= max 83 return max, nil 84 } 85 86 // DecodeSpaced is like Decode except it expands the values to leave spaces for null 87 // as determined by the validBits bitmap. 88 func (dec *PlainBooleanDecoder) DecodeSpaced(out []bool, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { 89 if nullCount > 0 { 90 toRead := len(out) - nullCount 91 valuesRead, err := dec.Decode(out[:toRead]) 92 if err != nil { 93 return 0, err 94 } 95 if valuesRead != toRead { 96 return valuesRead, xerrors.New("parquet: boolean decoder: number of values / definition levels read did not match") 97 } 98 return spacedExpand(out, nullCount, validBits, validBitsOffset), nil 99 } 100 return dec.Decode(out) 101 }