github.com/apache/arrow/go/v14@v14.0.1/internal/bitutils/bit_run_reader.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one 2 // or more contributor license agreements. See the NOTICE file 3 // distributed with this work for additional information 4 // regarding copyright ownership. The ASF licenses this file 5 // to you under the Apache License, Version 2.0 (the 6 // "License"); you may not use this file except in compliance 7 // with the License. You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package bitutils 18 19 import ( 20 "encoding/binary" 21 "fmt" 22 "math/bits" 23 "unsafe" 24 25 "github.com/apache/arrow/go/v14/arrow" 26 "github.com/apache/arrow/go/v14/arrow/bitutil" 27 "github.com/apache/arrow/go/v14/internal/utils" 28 ) 29 30 // BitRun represents a run of bits with the same value of length Len 31 // with Set representing if the group of bits were 1 or 0. 32 type BitRun struct { 33 Len int64 34 Set bool 35 } 36 37 // BitRunReader is an interface that is usable by multiple callers to provide 38 // multiple types of bit run readers such as a reverse reader and so on. 39 // 40 // It's a convenience interface for counting contiguous set/unset bits in a bitmap. 41 // In places where BitBlockCounter can be used, then it would be preferred to use that 42 // as it would be faster than using BitRunReader. 43 type BitRunReader interface { 44 NextRun() BitRun 45 } 46 47 func (b BitRun) String() string { 48 return fmt.Sprintf("{Length: %d, set=%t}", b.Len, b.Set) 49 } 50 51 type bitRunReader struct { 52 bitmap []byte 53 pos int64 54 length int64 55 word uint64 56 curRunBitSet bool 57 } 58 59 // NewBitRunReader returns a reader for the given bitmap, offset and length that 60 // grabs runs of the same value bit at a time for easy iteration. 61 func NewBitRunReader(bitmap []byte, offset int64, length int64) BitRunReader { 62 ret := &bitRunReader{ 63 bitmap: bitmap[offset/8:], 64 pos: offset % 8, 65 length: (offset % 8) + length, 66 } 67 68 if length == 0 { 69 return ret 70 } 71 72 ret.curRunBitSet = bitutil.BitIsNotSet(bitmap, int(offset)) 73 bitsRemaining := length + ret.pos 74 ret.loadWord(bitsRemaining) 75 ret.word = ret.word &^ LeastSignificantBitMask(ret.pos) 76 return ret 77 } 78 79 // NextRun returns a new BitRun containing the number of contiguous bits with the 80 // same value. Len == 0 indicates the end of the bitmap. 81 func (b *bitRunReader) NextRun() BitRun { 82 if b.pos >= b.length { 83 return BitRun{0, false} 84 } 85 86 // This implementation relies on a efficient implementations of 87 // CountTrailingZeros and assumes that runs are more often then 88 // not. The logic is to incrementally find the next bit change 89 // from the current position. This is done by zeroing all 90 // bits in word_ up to position_ and using the TrailingZeroCount 91 // to find the index of the next set bit. 92 93 // The runs alternate on each call, so flip the bit. 94 b.curRunBitSet = !b.curRunBitSet 95 96 start := b.pos 97 startOffset := start & 63 98 99 // Invert the word for proper use of CountTrailingZeros and 100 // clear bits so CountTrailingZeros can do it magic. 101 b.word = ^b.word &^ LeastSignificantBitMask(startOffset) 102 103 // Go forward until the next change from unset to set. 104 newbits := int64(bits.TrailingZeros64(b.word)) - startOffset 105 b.pos += newbits 106 107 if IsMultipleOf64(b.pos) && b.pos < b.length { 108 b.advanceUntilChange() 109 } 110 return BitRun{b.pos - start, b.curRunBitSet} 111 } 112 113 func (b *bitRunReader) advanceUntilChange() { 114 newbits := int64(0) 115 for { 116 b.bitmap = b.bitmap[arrow.Uint64SizeBytes:] 117 b.loadNextWord() 118 newbits = int64(bits.TrailingZeros64(b.word)) 119 b.pos += newbits 120 if !IsMultipleOf64(b.pos) || b.pos >= b.length || newbits <= 0 { 121 break 122 } 123 } 124 } 125 126 func (b *bitRunReader) loadNextWord() { 127 b.loadWord(b.length - b.pos) 128 } 129 130 func (b *bitRunReader) loadWord(bitsRemaining int64) { 131 b.word = 0 132 if bitsRemaining >= 64 { 133 b.word = binary.LittleEndian.Uint64(b.bitmap) 134 } else { 135 nbytes := bitutil.BytesForBits(bitsRemaining) 136 wordptr := (*(*[8]byte)(unsafe.Pointer(&b.word)))[:] 137 copy(wordptr, b.bitmap[:nbytes]) 138 139 bitutil.SetBitTo(wordptr, int(bitsRemaining), bitutil.BitIsNotSet(wordptr, int(bitsRemaining-1))) 140 // reset the value to little endian for big endian architectures 141 b.word = utils.ToLEUint64(b.word) 142 } 143 144 // Two cases: 145 // 1. For unset, CountTrailingZeros works naturally so we don't 146 // invert the word. 147 // 2. Otherwise invert so we can use CountTrailingZeros. 148 if b.curRunBitSet { 149 b.word = ^b.word 150 } 151 }