github.com/apache/arrow/go/v14@v14.0.1/parquet/internal/utils/bitmap_writer.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package utils 18 19 import ( 20 "encoding/binary" 21 "math/bits" 22 23 "github.com/apache/arrow/go/v14/arrow/bitutil" 24 ) 25 26 // BitmapWriter is an interface for bitmap writers so that we can use multiple 27 // implementations or swap if necessary. 28 type BitmapWriter interface { 29 // Set sets the current bit that will be written 30 Set() 31 // Clear clears the current bit that will be written 32 Clear() 33 // Next advances to the next bit for the writer 34 Next() 35 // Finish flushes the current byte out to the bitmap slice 36 Finish() 37 // AppendWord takes nbits from word which should be an LSB bitmap and appends them to the bitmap. 38 AppendWord(word uint64, nbits int64) 39 // AppendBools appends the bit representation of the bools slice, returning the number 40 // of bools that were able to fit in the remaining length of the bitmapwriter. 41 AppendBools(in []bool) int 42 // Pos is the current position that will be written next 43 Pos() int 44 // Reset allows reusing the bitmapwriter by resetting Pos to start with length as 45 // the number of bits that the writer can write. 46 Reset(start, length int) 47 } 48 49 type bitmapWriter struct { 50 *bitutil.BitmapWriter 51 } 52 53 func NewBitmapWriter(bitmap []byte, start, length int) BitmapWriter { 54 return &bitmapWriter{bitutil.NewBitmapWriter(bitmap, start, length)} 55 } 56 57 func (b *bitmapWriter) AppendWord(uint64, int64) { 58 panic("unimplemented") 59 } 60 61 type firstTimeBitmapWriter struct { 62 buf []byte 63 pos int64 64 length int64 65 66 curByte uint8 67 bitMask uint8 68 byteOffset int64 69 endianBuffer [8]byte 70 } 71 72 // NewFirstTimeBitmapWriter creates a bitmap writer that might clobber any bit values 73 // following the bits written to the bitmap, as such it is faster than the bitmapwriter 74 // that is created with NewBitmapWriter 75 func NewFirstTimeBitmapWriter(buf []byte, start, length int64) BitmapWriter { 76 ret := &firstTimeBitmapWriter{ 77 buf: buf, 78 byteOffset: start / 8, 79 bitMask: bitutil.BitMask[start%8], 80 length: length, 81 } 82 if length > 0 { 83 ret.curByte = ret.buf[int(ret.byteOffset)] & bitutil.PrecedingBitmask[start%8] 84 } 85 return ret 86 } 87 88 func (bw *firstTimeBitmapWriter) Reset(start, length int) { 89 bw.pos = 0 90 bw.byteOffset = int64(start / 8) 91 bw.bitMask = bitutil.BitMask[start%8] 92 bw.length = int64(length) 93 if length > 0 { 94 bw.curByte = bw.buf[int(bw.byteOffset)] & bitutil.PrecedingBitmask[start%8] 95 } 96 } 97 98 func (bw *firstTimeBitmapWriter) Pos() int { return int(bw.pos) } 99 func (bw *firstTimeBitmapWriter) AppendWord(word uint64, nbits int64) { 100 if nbits == 0 { 101 return 102 } 103 104 // location that the first byte needs to be written to for appending 105 appslice := bw.buf[int(bw.byteOffset):] 106 107 // update everything but curByte 108 bw.pos += nbits 109 bitOffset := bits.TrailingZeros32(uint32(bw.bitMask)) 110 bw.bitMask = bitutil.BitMask[(int64(bitOffset)+nbits)%8] 111 bw.byteOffset += (int64(bitOffset) + nbits) / 8 112 113 if bitOffset != 0 { 114 // we're in the middle of the byte. Update the byte and shift bits appropriately 115 // so we can just copy the bytes. 116 carry := 8 - bitOffset 117 // Carry over bits from word to curByte. We assume any extra bits in word are unset 118 // so no additional accounting is needed for when nbits < carry 119 bw.curByte |= uint8((word & uint64(bitutil.PrecedingBitmask[carry])) << bitOffset) 120 // check everything was transferred to curByte 121 if nbits < int64(carry) { 122 return 123 } 124 appslice[0] = bw.curByte 125 appslice = appslice[1:] 126 // move the carry bits off of word 127 word = word >> carry 128 nbits -= int64(carry) 129 } 130 bytesForWord := bitutil.BytesForBits(nbits) 131 binary.LittleEndian.PutUint64(bw.endianBuffer[:], word) 132 copy(appslice, bw.endianBuffer[:bytesForWord]) 133 134 // at this point, the previous curByte has been written, the new curByte 135 // is either the last relevant byte in word or cleared if the new position 136 // is byte aligned (ie. a fresh byte) 137 if bw.bitMask == 0x1 { 138 bw.curByte = 0 139 } else { 140 bw.curByte = appslice[bytesForWord-1] 141 } 142 } 143 144 func (bw *firstTimeBitmapWriter) Set() { 145 bw.curByte |= bw.bitMask 146 } 147 148 func (bw *firstTimeBitmapWriter) Clear() {} 149 150 func (bw *firstTimeBitmapWriter) Next() { 151 bw.bitMask = uint8(bw.bitMask << 1) 152 bw.pos++ 153 if bw.bitMask == 0 { 154 // byte finished, advance to the next one 155 bw.bitMask = 0x1 156 bw.buf[int(bw.byteOffset)] = bw.curByte 157 bw.byteOffset++ 158 bw.curByte = 0 159 } 160 } 161 162 func (b *firstTimeBitmapWriter) AppendBools(in []bool) int { 163 panic("Append Bools not yet implemented for firstTimeBitmapWriter") 164 } 165 166 func (bw *firstTimeBitmapWriter) Finish() { 167 // store curByte into the bitmap 168 if bw.length > 0 && bw.bitMask != 0x01 || bw.pos < bw.length { 169 bw.buf[int(bw.byteOffset)] = bw.curByte 170 } 171 } 172 173 func (bw *firstTimeBitmapWriter) Position() int64 { return bw.pos }