github.com/ledgerwatch/erigon-lib@v1.0.0/kv/bitmapdb/bitmapdb.go (about) 1 /* 2 Copyright 2022 The Erigon contributors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package bitmapdb 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "math" 23 "sort" 24 "sync" 25 26 "github.com/RoaringBitmap/roaring" 27 "github.com/RoaringBitmap/roaring/roaring64" 28 "github.com/c2h5oh/datasize" 29 30 "github.com/ledgerwatch/erigon-lib/common" 31 "github.com/ledgerwatch/erigon-lib/kv" 32 ) 33 34 const MaxUint32 = 1<<32 - 1 35 36 type ToBitamp interface { 37 ToBitmap() (*roaring64.Bitmap, error) 38 } 39 40 var roaringPool = sync.Pool{ 41 New: func() any { 42 return roaring.New() 43 }, 44 } 45 46 func NewBitmap() *roaring.Bitmap { 47 a := roaringPool.Get().(*roaring.Bitmap) 48 a.Clear() 49 return a 50 } 51 func ReturnToPool(a *roaring.Bitmap) { 52 if a == nil { 53 return 54 } 55 roaringPool.Put(a) 56 } 57 58 var roaring64Pool = sync.Pool{ 59 New: func() any { 60 return roaring64.New() 61 }, 62 } 63 64 func NewBitmap64() *roaring64.Bitmap { 65 a := roaring64Pool.Get().(*roaring64.Bitmap) 66 a.Clear() 67 return a 68 } 69 func ReturnToPool64(a *roaring64.Bitmap) { 70 if a == nil { 71 return 72 } 73 roaring64Pool.Put(a) 74 } 75 76 const ChunkLimit = uint64(1950 * datasize.B) // threshold beyond which MDBX overflow pages appear: 4096 / 2 - (keySize + 8) 77 78 // CutLeft - cut from bitmap `targetSize` bytes from left 79 // removing lft part from `bm` 80 // returns nil on zero cardinality 81 func CutLeft(bm *roaring.Bitmap, sizeLimit uint64) *roaring.Bitmap { 82 if bm.GetCardinality() == 0 { 83 return nil 84 } 85 86 sz := bm.GetSerializedSizeInBytes() 87 if sz <= sizeLimit { 88 lft := roaring.New() 89 lft.AddRange(uint64(bm.Minimum()), uint64(bm.Maximum())+1) 90 lft.And(bm) 91 lft.RunOptimize() 92 bm.Clear() 93 return lft 94 } 95 96 from := uint64(bm.Minimum()) 97 minMax := bm.Maximum() - bm.Minimum() 98 to := sort.Search(int(minMax), func(i int) bool { // can be optimized to avoid "too small steps", but let's leave it for readability 99 lft := roaring.New() // bitmap.Clear() method intentionally not used here, because then serialized size of bitmap getting bigger 100 lft.AddRange(from, from+uint64(i)+1) 101 lft.And(bm) 102 lft.RunOptimize() 103 return lft.GetSerializedSizeInBytes() > sizeLimit 104 }) 105 106 lft := roaring.New() 107 lft.AddRange(from, from+uint64(to)) // no +1 because sort.Search returns element which is just higher threshold - but we need lower 108 lft.And(bm) 109 bm.RemoveRange(from, from+uint64(to)) 110 lft.RunOptimize() 111 return lft 112 } 113 114 func WalkChunks(bm *roaring.Bitmap, sizeLimit uint64, f func(chunk *roaring.Bitmap, isLast bool) error) error { 115 for bm.GetCardinality() > 0 { 116 if err := f(CutLeft(bm, sizeLimit), bm.GetCardinality() == 0); err != nil { 117 return err 118 } 119 } 120 return nil 121 } 122 123 func WalkChunkWithKeys(k []byte, m *roaring.Bitmap, sizeLimit uint64, f func(chunkKey []byte, chunk *roaring.Bitmap) error) error { 124 return WalkChunks(m, sizeLimit, func(chunk *roaring.Bitmap, isLast bool) error { 125 chunkKey := make([]byte, len(k)+4) 126 copy(chunkKey, k) 127 if isLast { 128 binary.BigEndian.PutUint32(chunkKey[len(k):], ^uint32(0)) 129 } else { 130 binary.BigEndian.PutUint32(chunkKey[len(k):], chunk.Maximum()) 131 } 132 return f(chunkKey, chunk) 133 }) 134 } 135 136 // TruncateRange - gets existing bitmap in db and call RemoveRange operator on it. 137 // starts from hot shard, stops when shard not overlap with [from-to) 138 // !Important: [from, to) 139 func TruncateRange(db kv.RwTx, bucket string, key []byte, to uint32) error { 140 chunkKey := make([]byte, len(key)+4) 141 copy(chunkKey, key) 142 binary.BigEndian.PutUint32(chunkKey[len(chunkKey)-4:], to) 143 bm, err := Get(db, bucket, key, to, MaxUint32) 144 if err != nil { 145 return err 146 } 147 148 if bm.GetCardinality() > 0 && to <= bm.Maximum() { 149 bm.RemoveRange(uint64(to), uint64(bm.Maximum())+1) 150 } 151 152 c, err := db.Cursor(bucket) 153 if err != nil { 154 return err 155 } 156 defer c.Close() 157 if err := Walk(c, chunkKey, 0, func(k, v []byte) (bool, error) { 158 if !bytes.HasPrefix(k, key) { 159 return false, nil 160 } 161 if err := db.Delete(bucket, k); err != nil { 162 return false, err 163 } 164 return true, nil 165 }); err != nil { 166 return err 167 } 168 169 buf := bytes.NewBuffer(nil) 170 return WalkChunkWithKeys(key, bm, ChunkLimit, func(chunkKey []byte, chunk *roaring.Bitmap) error { 171 buf.Reset() 172 if _, err := chunk.WriteTo(buf); err != nil { 173 return err 174 } 175 return db.Put(bucket, chunkKey, common.Copy(buf.Bytes())) 176 }) 177 } 178 179 // Get - reading as much chunks as needed to satisfy [from, to] condition 180 // join all chunks to 1 bitmap by Or operator 181 func Get(db kv.Tx, bucket string, key []byte, from, to uint32) (*roaring.Bitmap, error) { 182 var chunks []*roaring.Bitmap 183 184 fromKey := make([]byte, len(key)+4) 185 copy(fromKey, key) 186 binary.BigEndian.PutUint32(fromKey[len(fromKey)-4:], from) 187 c, err := db.Cursor(bucket) 188 if err != nil { 189 return nil, err 190 } 191 defer c.Close() 192 for k, v, err := c.Seek(fromKey); k != nil; k, v, err = c.Next() { 193 if err != nil { 194 return nil, err 195 } 196 if !bytes.HasPrefix(k, key) { 197 break 198 } 199 bm := NewBitmap() 200 defer ReturnToPool(bm) 201 if _, err := bm.ReadFrom(bytes.NewReader(v)); err != nil { 202 return nil, err 203 } 204 chunks = append(chunks, bm) 205 if binary.BigEndian.Uint32(k[len(k)-4:]) >= to { 206 break 207 } 208 } 209 if len(chunks) == 0 { 210 return roaring.New(), nil 211 } 212 return roaring.FastOr(chunks...), nil 213 } 214 215 // SeekInBitmap - returns value in bitmap which is >= n 216 // 217 //nolint:deadcode 218 func SeekInBitmap(m *roaring.Bitmap, n uint32) (found uint32, ok bool) { 219 i := m.Iterator() 220 i.AdvanceIfNeeded(n) 221 ok = i.HasNext() 222 if ok { 223 found = i.Next() 224 } 225 return found, ok 226 } 227 228 // CutLeft - cut from bitmap `targetSize` bytes from left 229 // removing lft part from `bm` 230 // returns nil on zero cardinality 231 func CutLeft64(bm *roaring64.Bitmap, sizeLimit uint64) *roaring64.Bitmap { 232 if bm.GetCardinality() == 0 { 233 return nil 234 } 235 236 sz := bm.GetSerializedSizeInBytes() 237 if sz <= sizeLimit { 238 lft := roaring64.New() 239 lft.AddRange(bm.Minimum(), bm.Maximum()+1) 240 lft.And(bm) 241 lft.RunOptimize() 242 bm.Clear() 243 return lft 244 } 245 246 from := bm.Minimum() 247 minMax := bm.Maximum() - bm.Minimum() 248 to := sort.Search(int(minMax), func(i int) bool { // can be optimized to avoid "too small steps", but let's leave it for readability 249 lft := roaring64.New() // bitmap.Clear() method intentionally not used here, because then serialized size of bitmap getting bigger 250 lft.AddRange(from, from+uint64(i)+1) 251 lft.And(bm) 252 lft.RunOptimize() 253 return lft.GetSerializedSizeInBytes() > sizeLimit 254 }) 255 256 lft := roaring64.New() 257 lft.AddRange(from, from+uint64(to)) // no +1 because sort.Search returns element which is just higher threshold - but we need lower 258 lft.And(bm) 259 bm.RemoveRange(from, from+uint64(to)) 260 lft.RunOptimize() 261 return lft 262 } 263 264 func WalkChunks64(bm *roaring64.Bitmap, sizeLimit uint64, f func(chunk *roaring64.Bitmap, isLast bool) error) error { 265 for bm.GetCardinality() > 0 { 266 if err := f(CutLeft64(bm, sizeLimit), bm.GetCardinality() == 0); err != nil { 267 return err 268 } 269 } 270 return nil 271 } 272 273 func WalkChunkWithKeys64(k []byte, m *roaring64.Bitmap, sizeLimit uint64, f func(chunkKey []byte, chunk *roaring64.Bitmap) error) error { 274 return WalkChunks64(m, sizeLimit, func(chunk *roaring64.Bitmap, isLast bool) error { 275 chunkKey := make([]byte, len(k)+8) 276 copy(chunkKey, k) 277 if isLast { 278 binary.BigEndian.PutUint64(chunkKey[len(k):], ^uint64(0)) 279 } else { 280 binary.BigEndian.PutUint64(chunkKey[len(k):], chunk.Maximum()) 281 } 282 return f(chunkKey, chunk) 283 }) 284 } 285 286 // TruncateRange - gets existing bitmap in db and call RemoveRange operator on it. 287 // starts from hot shard, stops when shard not overlap with [from-to) 288 // !Important: [from, to) 289 func TruncateRange64(db kv.RwTx, bucket string, key []byte, to uint64) error { 290 chunkKey := make([]byte, len(key)+8) 291 copy(chunkKey, key) 292 binary.BigEndian.PutUint64(chunkKey[len(chunkKey)-8:], to) 293 bm, err := Get64(db, bucket, key, to, math.MaxUint64) 294 if err != nil { 295 return err 296 } 297 298 if bm.GetCardinality() > 0 && to <= bm.Maximum() { 299 bm.RemoveRange(to, bm.Maximum()+1) 300 } 301 302 c, err := db.Cursor(bucket) 303 if err != nil { 304 return err 305 } 306 defer c.Close() 307 cDel, err := db.RwCursor(bucket) 308 if err != nil { 309 return err 310 } 311 defer cDel.Close() 312 if err := Walk(c, chunkKey, 0, func(k, v []byte) (bool, error) { 313 if !bytes.HasPrefix(k, key) { 314 return false, nil 315 } 316 if err := cDel.Delete(k); err != nil { 317 return false, err 318 } 319 return true, nil 320 }); err != nil { 321 return err 322 } 323 324 buf := bytes.NewBuffer(nil) 325 return WalkChunkWithKeys64(key, bm, ChunkLimit, func(chunkKey []byte, chunk *roaring64.Bitmap) error { 326 buf.Reset() 327 if _, err := chunk.WriteTo(buf); err != nil { 328 return err 329 } 330 return db.Put(bucket, chunkKey, common.Copy(buf.Bytes())) 331 }) 332 } 333 334 // Get - reading as much chunks as needed to satisfy [from, to] condition 335 // join all chunks to 1 bitmap by Or operator 336 func Get64(db kv.Tx, bucket string, key []byte, from, to uint64) (*roaring64.Bitmap, error) { 337 var chunks []*roaring64.Bitmap 338 339 fromKey := make([]byte, len(key)+8) 340 copy(fromKey, key) 341 binary.BigEndian.PutUint64(fromKey[len(fromKey)-8:], from) 342 343 c, err := db.Cursor(bucket) 344 if err != nil { 345 return nil, err 346 } 347 defer c.Close() 348 for k, v, err := c.Seek(fromKey); k != nil; k, v, err = c.Next() { 349 if err != nil { 350 return nil, err 351 } 352 if !bytes.HasPrefix(k, key) { 353 break 354 } 355 bm := NewBitmap64() 356 defer ReturnToPool64(bm) 357 _, err := bm.ReadFrom(bytes.NewReader(v)) 358 if err != nil { 359 return nil, err 360 } 361 chunks = append(chunks, bm) 362 if binary.BigEndian.Uint64(k[len(k)-8:]) >= to { 363 break 364 } 365 } 366 367 if len(chunks) == 0 { 368 return roaring64.New(), nil 369 } 370 return roaring64.FastOr(chunks...), nil 371 } 372 373 // SeekInBitmap - returns value in bitmap which is >= n 374 func SeekInBitmap64(m *roaring64.Bitmap, n uint64) (found uint64, ok bool) { 375 if m.IsEmpty() { 376 return 0, false 377 } 378 if n == 0 { 379 return m.Minimum(), true 380 } 381 searchRank := m.Rank(n - 1) 382 if searchRank >= m.GetCardinality() { 383 return 0, false 384 } 385 found, _ = m.Select(searchRank) 386 return found, true 387 } 388 389 func Walk(c kv.Cursor, startkey []byte, fixedbits int, walker func(k, v []byte) (bool, error)) error { 390 fixedbytes, mask := Bytesmask(fixedbits) 391 k, v, err := c.Seek(startkey) 392 if err != nil { 393 return err 394 } 395 for k != nil && len(k) >= fixedbytes && (fixedbits == 0 || bytes.Equal(k[:fixedbytes-1], startkey[:fixedbytes-1]) && (k[fixedbytes-1]&mask) == (startkey[fixedbytes-1]&mask)) { 396 goOn, err := walker(k, v) 397 if err != nil { 398 return err 399 } 400 if !goOn { 401 break 402 } 403 k, v, err = c.Next() 404 if err != nil { 405 return err 406 } 407 } 408 return nil 409 } 410 411 func Bytesmask(fixedbits int) (fixedbytes int, mask byte) { 412 fixedbytes = common.BitLenToByteLen(fixedbits) 413 shiftbits := fixedbits & 7 414 mask = byte(0xff) 415 if shiftbits != 0 { 416 mask = 0xff << (8 - shiftbits) 417 } 418 return fixedbytes, mask 419 } 420 421 type ToBitmap interface { 422 ToBitmap() (*roaring64.Bitmap, error) 423 } 424 425 func ToIter(it roaring64.IntIterable64) *ToIterInterface { return &ToIterInterface{it: it} } 426 427 type ToIterInterface struct{ it roaring64.IntIterable64 } 428 429 func (i *ToIterInterface) HasNext() bool { return i.it.HasNext() } 430 func (i *ToIterInterface) Next() (uint64, error) { return i.it.Next(), nil }