github.com/apache/arrow/go/v14@v14.0.1/parquet/internal/encoding/boolean_decoder.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package encoding 18 19 import ( 20 "github.com/apache/arrow/go/v14/arrow/bitutil" 21 shared_utils "github.com/apache/arrow/go/v14/internal/utils" 22 "github.com/apache/arrow/go/v14/parquet" 23 "github.com/apache/arrow/go/v14/parquet/internal/utils" 24 "golang.org/x/xerrors" 25 ) 26 27 // PlainBooleanDecoder is for the Plain Encoding type, there is no 28 // dictionary decoding for bools. 29 type PlainBooleanDecoder struct { 30 decoder 31 32 bitOffset int 33 } 34 35 // Type for the PlainBooleanDecoder is parquet.Types.Boolean 36 func (PlainBooleanDecoder) Type() parquet.Type { 37 return parquet.Types.Boolean 38 } 39 40 func (dec *PlainBooleanDecoder) SetData(nvals int, data []byte) error { 41 if err := dec.decoder.SetData(nvals, data); err != nil { 42 return err 43 } 44 dec.bitOffset = 0 45 return nil 46 } 47 48 // Decode fills out with bools decoded from the data at the current point 49 // or until we reach the end of the data. 50 // 51 // Returns the number of values decoded 52 func (dec *PlainBooleanDecoder) Decode(out []bool) (int, error) { 53 max := shared_utils.MinInt(len(out), dec.nvals) 54 55 // attempts to read all remaining bool values from the current data byte 56 unalignedExtract := func(i int) int { 57 for ; dec.bitOffset < 8 && i < max; i, dec.bitOffset = i+1, dec.bitOffset+1 { 58 out[i] = (dec.data[0] & byte(1<<dec.bitOffset)) != 0 59 } 60 if dec.bitOffset == 8 { 61 // we read every bit from this byte 62 dec.bitOffset = 0 63 dec.data = dec.data[1:] // move data forward 64 } 65 return i // return the next index for out[] 66 } 67 68 // if we aren't at a byte boundary, then get bools until we hit 69 // a byte boundary with the bit offset. 70 i := 0 71 if dec.bitOffset != 0 { 72 i = unalignedExtract(i) 73 } 74 75 // determine the number of full bytes worth of bits we can decode 76 // given the number of values we want to decode. 77 bitsRemain := max - i 78 batch := (bitsRemain / 8) * 8 79 if batch > 0 { // only go in here if there's at least one full byte to decode 80 // determine the number of aligned bytes we can grab using SIMD optimized 81 // functions to improve performance. 82 alignedBytes := bitutil.BytesForBits(int64(batch)) 83 utils.BytesToBools(dec.data[:alignedBytes], out[i:]) 84 85 dec.data = dec.data[alignedBytes:] // move data forward 86 i += int(alignedBytes) * 8 87 } 88 89 // grab any trailing bits now that we've got our aligned bytes. 90 _ = unalignedExtract(i) 91 92 dec.nvals -= max 93 return max, nil 94 } 95 96 // DecodeSpaced is like Decode except it expands the values to leave spaces for null 97 // as determined by the validBits bitmap. 98 func (dec *PlainBooleanDecoder) DecodeSpaced(out []bool, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { 99 if nullCount > 0 { 100 toRead := len(out) - nullCount 101 valuesRead, err := dec.Decode(out[:toRead]) 102 if err != nil { 103 return 0, err 104 } 105 if valuesRead != toRead { 106 return valuesRead, xerrors.New("parquet: boolean decoder: number of values / definition levels read did not match") 107 } 108 return spacedExpand(out, nullCount, validBits, validBitsOffset), nil 109 } 110 return dec.Decode(out) 111 }