github.com/apache/arrow/go/v14@v14.0.1/internal/bitutils/bit_set_run_reader.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package bitutils 18 19 import ( 20 "encoding/binary" 21 "math/bits" 22 23 "github.com/apache/arrow/go/v14/arrow/bitutil" 24 "github.com/apache/arrow/go/v14/internal/utils" 25 ) 26 27 // IsMultipleOf64 returns whether v is a multiple of 64. 28 func IsMultipleOf64(v int64) bool { return v&63 == 0 } 29 30 // LeastSignificantBitMask returns a bit mask to return the least significant 31 // bits for a value starting from the bit index passed in. ie: if you want a 32 // mask for the 4 least significant bits, you call LeastSignificantBitMask(4) 33 func LeastSignificantBitMask(index int64) uint64 { 34 return (uint64(1) << index) - 1 35 } 36 37 // SetBitRun describes a run of contiguous set bits in a bitmap with Pos being 38 // the starting position of the run and Length being the number of bits. 39 type SetBitRun struct { 40 Pos int64 41 Length int64 42 } 43 44 // AtEnd returns true if this bit run is the end of the set by checking 45 // that the length is 0. 46 func (s SetBitRun) AtEnd() bool { 47 return s.Length == 0 48 } 49 50 // Equal returns whether rhs is the same run as s 51 func (s SetBitRun) Equal(rhs SetBitRun) bool { 52 return s.Pos == rhs.Pos && s.Length == rhs.Length 53 } 54 55 // SetBitRunReader is an interface for reading groups of contiguous set bits 56 // from a bitmap. The interface allows us to create different reader implementations 57 // that share the same interface easily such as a reverse set reader. 58 type SetBitRunReader interface { 59 // NextRun will return the next run of contiguous set bits in the bitmap 60 NextRun() SetBitRun 61 // Reset allows re-using the reader by providing a new bitmap, offset and length. The arguments 62 // match the New function for the reader being used. 63 Reset([]byte, int64, int64) 64 // VisitSetBitRuns calls visitFn for each set in a loop starting from the current position 65 // it's roughly equivalent to simply looping, calling NextRun and calling visitFn on the run 66 // for each run. 67 VisitSetBitRuns(visitFn VisitFn) error 68 } 69 70 type baseSetBitRunReader struct { 71 bitmap []byte 72 pos int64 73 length int64 74 remaining int64 75 curWord uint64 76 curNumBits int32 77 reversed bool 78 79 firstBit uint64 80 } 81 82 // NewSetBitRunReader returns a SetBitRunReader for the bitmap starting at startOffset which will read 83 // numvalues bits. 84 func NewSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader { 85 return newBaseSetBitRunReader(validBits, startOffset, numValues, false) 86 } 87 88 // NewReverseSetBitRunReader returns a SetBitRunReader like NewSetBitRunReader, except it will 89 // return runs starting from the end of the bitmap until it reaches startOffset rather than starting 90 // at startOffset and reading from there. The SetBitRuns will still operate the same, so Pos 91 // will still be the position of the "left-most" bit of the run or the "start" of the run. It 92 // just returns runs starting from the end instead of starting from the beginning. 93 func NewReverseSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader { 94 return newBaseSetBitRunReader(validBits, startOffset, numValues, true) 95 } 96 97 func newBaseSetBitRunReader(bitmap []byte, startOffset, length int64, reverse bool) *baseSetBitRunReader { 98 ret := &baseSetBitRunReader{reversed: reverse} 99 ret.Reset(bitmap, startOffset, length) 100 return ret 101 } 102 103 func (br *baseSetBitRunReader) Reset(bitmap []byte, startOffset, length int64) { 104 br.bitmap = bitmap 105 br.length = length 106 br.remaining = length 107 br.curNumBits = 0 108 br.curWord = 0 109 110 if !br.reversed { 111 br.pos = startOffset / 8 112 br.firstBit = 1 113 114 bitOffset := int8(startOffset % 8) 115 if length > 0 && bitOffset != 0 { 116 br.curNumBits = int32(utils.MinInt(int(length), int(8-bitOffset))) 117 br.curWord = br.loadPartial(bitOffset, int64(br.curNumBits)) 118 } 119 return 120 } 121 122 br.pos = (startOffset + length) / 8 123 br.firstBit = uint64(0x8000000000000000) 124 endBitOffset := int8((startOffset + length) % 8) 125 if length > 0 && endBitOffset != 0 { 126 br.pos++ 127 br.curNumBits = int32(utils.MinInt(int(length), int(endBitOffset))) 128 br.curWord = br.loadPartial(8-endBitOffset, int64(br.curNumBits)) 129 } 130 } 131 132 func (br *baseSetBitRunReader) consumeBits(word uint64, nbits int32) uint64 { 133 if br.reversed { 134 return word << nbits 135 } 136 return word >> nbits 137 } 138 139 func (br *baseSetBitRunReader) countFirstZeros(word uint64) int32 { 140 if br.reversed { 141 return int32(bits.LeadingZeros64(word)) 142 } 143 return int32(bits.TrailingZeros64(word)) 144 } 145 146 func (br *baseSetBitRunReader) loadPartial(bitOffset int8, numBits int64) uint64 { 147 var word [8]byte 148 nbytes := bitutil.BytesForBits(numBits) 149 if br.reversed { 150 br.pos -= nbytes 151 copy(word[8-nbytes:], br.bitmap[br.pos:br.pos+nbytes]) 152 return (binary.LittleEndian.Uint64(word[:]) << bitOffset) &^ LeastSignificantBitMask(64-numBits) 153 } 154 155 copy(word[:], br.bitmap[br.pos:br.pos+nbytes]) 156 br.pos += nbytes 157 return (binary.LittleEndian.Uint64(word[:]) >> bitOffset) & LeastSignificantBitMask(numBits) 158 } 159 160 func (br *baseSetBitRunReader) findCurrentRun() SetBitRun { 161 nzeros := br.countFirstZeros(br.curWord) 162 if nzeros >= br.curNumBits { 163 br.remaining -= int64(br.curNumBits) 164 br.curWord = 0 165 br.curNumBits = 0 166 return SetBitRun{0, 0} 167 } 168 169 br.curWord = br.consumeBits(br.curWord, nzeros) 170 br.curNumBits -= nzeros 171 br.remaining -= int64(nzeros) 172 pos := br.position() 173 174 numOnes := br.countFirstZeros(^br.curWord) 175 br.curWord = br.consumeBits(br.curWord, numOnes) 176 br.curNumBits -= numOnes 177 br.remaining -= int64(numOnes) 178 return SetBitRun{pos, int64(numOnes)} 179 } 180 181 func (br *baseSetBitRunReader) position() int64 { 182 if br.reversed { 183 return br.remaining 184 } 185 return br.length - br.remaining 186 } 187 188 func (br *baseSetBitRunReader) adjustRun(run SetBitRun) SetBitRun { 189 if br.reversed { 190 run.Pos -= run.Length 191 } 192 return run 193 } 194 195 func (br *baseSetBitRunReader) loadFull() (ret uint64) { 196 if br.reversed { 197 br.pos -= 8 198 } 199 ret = binary.LittleEndian.Uint64(br.bitmap[br.pos : br.pos+8]) 200 if !br.reversed { 201 br.pos += 8 202 } 203 return 204 } 205 206 func (br *baseSetBitRunReader) skipNextZeros() { 207 for br.remaining >= 64 { 208 br.curWord = br.loadFull() 209 nzeros := br.countFirstZeros(br.curWord) 210 if nzeros < 64 { 211 br.curWord = br.consumeBits(br.curWord, nzeros) 212 br.curNumBits = 64 - nzeros 213 br.remaining -= int64(nzeros) 214 return 215 } 216 br.remaining -= 64 217 } 218 // run of zeros continues in last bitmap word 219 if br.remaining > 0 { 220 br.curWord = br.loadPartial(0, br.remaining) 221 br.curNumBits = int32(br.remaining) 222 nzeros := int32(utils.MinInt(int(br.curNumBits), int(br.countFirstZeros(br.curWord)))) 223 br.curWord = br.consumeBits(br.curWord, nzeros) 224 br.curNumBits -= nzeros 225 br.remaining -= int64(nzeros) 226 } 227 } 228 229 func (br *baseSetBitRunReader) countNextOnes() int64 { 230 var length int64 231 if ^br.curWord != 0 { 232 numOnes := br.countFirstZeros(^br.curWord) 233 br.remaining -= int64(numOnes) 234 br.curWord = br.consumeBits(br.curWord, numOnes) 235 br.curNumBits -= numOnes 236 if br.curNumBits != 0 { 237 return int64(numOnes) 238 } 239 length = int64(numOnes) 240 } else { 241 br.remaining -= 64 242 br.curNumBits = 0 243 length = 64 244 } 245 246 for br.remaining >= 64 { 247 br.curWord = br.loadFull() 248 numOnes := br.countFirstZeros(^br.curWord) 249 length += int64(numOnes) 250 br.remaining -= int64(numOnes) 251 if numOnes < 64 { 252 br.curWord = br.consumeBits(br.curWord, numOnes) 253 br.curNumBits = 64 - numOnes 254 return length 255 } 256 } 257 258 if br.remaining > 0 { 259 br.curWord = br.loadPartial(0, br.remaining) 260 br.curNumBits = int32(br.remaining) 261 numOnes := br.countFirstZeros(^br.curWord) 262 br.curWord = br.consumeBits(br.curWord, numOnes) 263 br.curNumBits -= numOnes 264 br.remaining -= int64(numOnes) 265 length += int64(numOnes) 266 } 267 return length 268 } 269 270 func (br *baseSetBitRunReader) NextRun() SetBitRun { 271 var ( 272 pos int64 = 0 273 length int64 = 0 274 ) 275 276 if br.curNumBits != 0 { 277 run := br.findCurrentRun() 278 if run.Length != 0 && br.curNumBits != 0 { 279 return br.adjustRun(run) 280 } 281 pos = run.Pos 282 length = run.Length 283 } 284 285 if length == 0 { 286 // we didn't get any ones in curWord, so we can skip any zeros 287 // in the following words 288 br.skipNextZeros() 289 if br.remaining == 0 { 290 return SetBitRun{0, 0} 291 } 292 pos = br.position() 293 } else if br.curNumBits == 0 { 294 if br.remaining >= 64 { 295 br.curWord = br.loadFull() 296 br.curNumBits = 64 297 } else if br.remaining > 0 { 298 br.curWord = br.loadPartial(0, br.remaining) 299 br.curNumBits = int32(br.remaining) 300 } else { 301 return br.adjustRun(SetBitRun{pos, length}) 302 } 303 if (br.curWord & br.firstBit) == 0 { 304 return br.adjustRun(SetBitRun{pos, length}) 305 } 306 } 307 308 length += br.countNextOnes() 309 return br.adjustRun(SetBitRun{pos, length}) 310 } 311 312 // VisitFn is a callback function for visiting runs of contiguous bits 313 type VisitFn func(pos int64, length int64) error 314 315 func (br *baseSetBitRunReader) VisitSetBitRuns(visitFn VisitFn) error { 316 for { 317 run := br.NextRun() 318 if run.Length == 0 { 319 break 320 } 321 322 if err := visitFn(run.Pos, run.Length); err != nil { 323 return err 324 } 325 } 326 return nil 327 } 328 329 // VisitSetBitRuns is just a convenience function for calling NewSetBitRunReader and then VisitSetBitRuns 330 func VisitSetBitRuns(bitmap []byte, bitmapOffset int64, length int64, visitFn VisitFn) error { 331 if bitmap == nil { 332 return visitFn(0, length) 333 } 334 rdr := NewSetBitRunReader(bitmap, bitmapOffset, length) 335 for { 336 run := rdr.NextRun() 337 if run.Length == 0 { 338 break 339 } 340 341 if err := visitFn(run.Pos, run.Length); err != nil { 342 return err 343 } 344 } 345 return nil 346 } 347 348 func VisitSetBitRunsNoErr(bitmap []byte, bitmapOffset int64, length int64, visitFn func(pos, length int64)) { 349 if bitmap == nil { 350 visitFn(0, length) 351 return 352 } 353 rdr := NewSetBitRunReader(bitmap, bitmapOffset, length) 354 for { 355 run := rdr.NextRun() 356 if run.Length == 0 { 357 break 358 } 359 visitFn(run.Pos, run.Length) 360 } 361 }