github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/roaringset/helpers.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package roaringset 13 14 import ( 15 "runtime" 16 "sync" 17 18 "github.com/sirupsen/logrus" 19 enterrors "github.com/weaviate/weaviate/entities/errors" 20 21 "github.com/weaviate/sroar" 22 ) 23 24 var ( 25 prefillBufferSize = 65_536 26 prefillMaxRoutines = 4 27 _NUMCPU = runtime.NumCPU() 28 ) 29 30 func NewBitmap(values ...uint64) *sroar.Bitmap { 31 bm := sroar.NewBitmap() 32 bm.SetMany(values) 33 return bm 34 } 35 36 // Operations on bitmaps may result in oversized instances in relation to 37 // number of elements currently contained in bitmap 38 // Examples of such operations: 39 // - And-ing bitmaps may results in size being sum of both sizes 40 // (especially and-ing bitmap with itself) 41 // - Removing elements from bitmap results in size not being reduced 42 // (even if there is only few or no elements left) 43 // 44 // Method should be used before saving bitmap to file, to ensure 45 // minimal required size 46 // 47 // For most cases Or between empty bitmap and used bitmap 48 // works pretty well for reducing its final size, except for use case, 49 // where used bitmap uses internally bitmap - it will not be converted 50 // to underlying array, even if there are single elements left 51 func Condense(bm *sroar.Bitmap) *sroar.Bitmap { 52 condensed := sroar.NewBitmap() 53 condensed.Or(bm) 54 return condensed 55 } 56 57 // NewInvertedBitmap creates a bitmap that as all IDs filled from 0 to maxVal. 58 // Then the source bitmap is subtracted (AndNot) from the all-ids bitmap, 59 // resulting in a bitmap containing all ids from 0 to maxVal except the ones 60 // that were set on the source. 61 func NewInvertedBitmap(source *sroar.Bitmap, maxVal uint64, logger logrus.FieldLogger) *sroar.Bitmap { 62 bm := NewBitmapPrefill(maxVal, logger) 63 bm.AndNot(source) 64 return bm 65 } 66 67 // Creates prefilled bitmap with values from 0 to maxVal (included). 68 // 69 // It is designed to be more performant both 70 // time-wise (compared to Set/SetMany) 71 // and memory-wise (compared to FromSortedList accepting entire slice of elements) 72 // Method creates multiple small bitmaps using FromSortedList (slice is reusable) 73 // and ORs them together to get final bitmap. 74 // For maxVal > prefillBufferSize (65_536) and multiple CPUs available task is performed 75 // by up to prefillMaxRoutines (4) goroutines. 76 func NewBitmapPrefill(maxVal uint64, logger logrus.FieldLogger) *sroar.Bitmap { 77 routinesLimit := prefillMaxRoutines 78 if _NUMCPU < routinesLimit { 79 routinesLimit = _NUMCPU 80 } 81 if routinesLimit == 1 || maxVal <= uint64(prefillBufferSize) { 82 return newBitmapPrefillSequential(maxVal) 83 } 84 return newBitmapPrefillParallel(maxVal, routinesLimit, logger) 85 } 86 87 func newBitmapPrefillSequential(maxVal uint64) *sroar.Bitmap { 88 inc := uint64(prefillBufferSize) 89 buf := make([]uint64, prefillBufferSize) 90 finalBM := sroar.NewBitmap() 91 92 for i := uint64(0); i <= maxVal; i += inc { 93 j := uint64(0) 94 for ; j < inc && i+j <= maxVal; j++ { 95 buf[j] = i + j 96 } 97 finalBM.Or(sroar.FromSortedList(buf[:j])) 98 } 99 return finalBM 100 } 101 102 func newBitmapPrefillParallel(maxVal uint64, routinesLimit int, logger logrus.FieldLogger) *sroar.Bitmap { 103 inc := uint64(prefillBufferSize / routinesLimit) 104 lock := new(sync.Mutex) 105 ch := make(chan uint64, routinesLimit) 106 wg := new(sync.WaitGroup) 107 wg.Add(routinesLimit) 108 finalBM := sroar.NewBitmap() 109 110 for r := 0; r < routinesLimit; r++ { 111 f := func() { 112 buf := make([]uint64, inc) 113 114 for i := range ch { 115 j := uint64(0) 116 for ; j < inc && i+j <= maxVal; j++ { 117 buf[j] = i + j 118 } 119 bm := sroar.FromSortedList(buf[:j]) 120 121 lock.Lock() 122 finalBM.Or(bm) 123 lock.Unlock() 124 } 125 wg.Done() 126 } 127 enterrors.GoWrapper(f, logger) 128 } 129 130 for i := uint64(0); i <= maxVal; i += inc { 131 ch <- i 132 } 133 close(ch) 134 wg.Wait() 135 return finalBM 136 } 137 138 type MaxValGetterFunc func() uint64 139 140 const ( 141 // DefaultBufferIncrement is the amount of bits greater than <maxVal> 142 // to reduce the amount of times BitmapFactory has to reallocate. 143 DefaultBufferIncrement = uint64(100) 144 ) 145 146 // BitmapFactory exists to prevent an expensive call to 147 // NewBitmapPrefill each time NewInvertedBitmap is invoked 148 type BitmapFactory struct { 149 bitmap *sroar.Bitmap 150 maxValGetter MaxValGetterFunc 151 currentMaxVal uint64 152 lock sync.RWMutex 153 } 154 155 func NewBitmapFactory(maxValGetter MaxValGetterFunc, logger logrus.FieldLogger) *BitmapFactory { 156 maxVal := maxValGetter() + DefaultBufferIncrement 157 return &BitmapFactory{ 158 bitmap: NewBitmapPrefill(maxVal, logger), 159 maxValGetter: maxValGetter, 160 currentMaxVal: maxVal, 161 } 162 } 163 164 // GetBitmap returns a prefilled bitmap, which is cloned from a shared internal. 165 // This method is safe to call concurrently. The purpose behind sharing an 166 // internal bitmap, is that a Clone() operation is much cheaper than prefilling 167 // a map up to <maxDocID> elements is an expensive operation, and this way we 168 // only have to do it once. 169 func (bmf *BitmapFactory) GetBitmap() *sroar.Bitmap { 170 bmf.lock.RLock() 171 maxVal := bmf.maxValGetter() 172 173 // We don't need to expand, maxVal is unchanged 174 { 175 if maxVal <= bmf.currentMaxVal { 176 cloned := bmf.bitmap.Clone() 177 bmf.lock.RUnlock() 178 return cloned 179 } 180 } 181 182 bmf.lock.RUnlock() 183 bmf.lock.Lock() 184 defer bmf.lock.Unlock() 185 186 // 2nd check to ensure bitmap wasn't expanded by 187 // concurrent request white waiting for write lock 188 { 189 maxVal = bmf.maxValGetter() 190 if maxVal <= bmf.currentMaxVal { 191 return bmf.bitmap.Clone() 192 } 193 } 194 195 // maxVal has grown to exceed even the buffer, 196 // time to expand 197 { 198 length := maxVal + DefaultBufferIncrement - bmf.currentMaxVal 199 list := make([]uint64, length) 200 for i := uint64(0); i < length; i++ { 201 list[i] = bmf.currentMaxVal + i + 1 202 } 203 204 bmf.bitmap.Or(sroar.FromSortedList(list)) 205 bmf.currentMaxVal = maxVal + DefaultBufferIncrement 206 } 207 208 return bmf.bitmap.Clone() 209 } 210 211 // ActualMaxVal returns the highest value in the bitmap not including the buffer 212 func (bmf *BitmapFactory) ActualMaxVal() uint64 { 213 bmf.lock.RLock() 214 defer bmf.lock.RUnlock() 215 return bmf.maxValGetter() 216 }