github.com/ledgerwatch/erigon-lib@v1.0.0/kv/bitmapdb/bitmapdb.go (about)

     1  /*
     2     Copyright 2022 The Erigon contributors
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package bitmapdb
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/binary"
    22  	"math"
    23  	"sort"
    24  	"sync"
    25  
    26  	"github.com/RoaringBitmap/roaring"
    27  	"github.com/RoaringBitmap/roaring/roaring64"
    28  	"github.com/c2h5oh/datasize"
    29  
    30  	"github.com/ledgerwatch/erigon-lib/common"
    31  	"github.com/ledgerwatch/erigon-lib/kv"
    32  )
    33  
    34  const MaxUint32 = 1<<32 - 1
    35  
    36  type ToBitamp interface {
    37  	ToBitmap() (*roaring64.Bitmap, error)
    38  }
    39  
    40  var roaringPool = sync.Pool{
    41  	New: func() any {
    42  		return roaring.New()
    43  	},
    44  }
    45  
    46  func NewBitmap() *roaring.Bitmap {
    47  	a := roaringPool.Get().(*roaring.Bitmap)
    48  	a.Clear()
    49  	return a
    50  }
    51  func ReturnToPool(a *roaring.Bitmap) {
    52  	if a == nil {
    53  		return
    54  	}
    55  	roaringPool.Put(a)
    56  }
    57  
    58  var roaring64Pool = sync.Pool{
    59  	New: func() any {
    60  		return roaring64.New()
    61  	},
    62  }
    63  
    64  func NewBitmap64() *roaring64.Bitmap {
    65  	a := roaring64Pool.Get().(*roaring64.Bitmap)
    66  	a.Clear()
    67  	return a
    68  }
    69  func ReturnToPool64(a *roaring64.Bitmap) {
    70  	if a == nil {
    71  		return
    72  	}
    73  	roaring64Pool.Put(a)
    74  }
    75  
    76  const ChunkLimit = uint64(1950 * datasize.B) // threshold beyond which MDBX overflow pages appear: 4096 / 2 - (keySize + 8)
    77  
    78  // CutLeft - cut from bitmap `targetSize` bytes from left
    79  // removing lft part from `bm`
    80  // returns nil on zero cardinality
    81  func CutLeft(bm *roaring.Bitmap, sizeLimit uint64) *roaring.Bitmap {
    82  	if bm.GetCardinality() == 0 {
    83  		return nil
    84  	}
    85  
    86  	sz := bm.GetSerializedSizeInBytes()
    87  	if sz <= sizeLimit {
    88  		lft := roaring.New()
    89  		lft.AddRange(uint64(bm.Minimum()), uint64(bm.Maximum())+1)
    90  		lft.And(bm)
    91  		lft.RunOptimize()
    92  		bm.Clear()
    93  		return lft
    94  	}
    95  
    96  	from := uint64(bm.Minimum())
    97  	minMax := bm.Maximum() - bm.Minimum()
    98  	to := sort.Search(int(minMax), func(i int) bool { // can be optimized to avoid "too small steps", but let's leave it for readability
    99  		lft := roaring.New() // bitmap.Clear() method intentionally not used here, because then serialized size of bitmap getting bigger
   100  		lft.AddRange(from, from+uint64(i)+1)
   101  		lft.And(bm)
   102  		lft.RunOptimize()
   103  		return lft.GetSerializedSizeInBytes() > sizeLimit
   104  	})
   105  
   106  	lft := roaring.New()
   107  	lft.AddRange(from, from+uint64(to)) // no +1 because sort.Search returns element which is just higher threshold - but we need lower
   108  	lft.And(bm)
   109  	bm.RemoveRange(from, from+uint64(to))
   110  	lft.RunOptimize()
   111  	return lft
   112  }
   113  
   114  func WalkChunks(bm *roaring.Bitmap, sizeLimit uint64, f func(chunk *roaring.Bitmap, isLast bool) error) error {
   115  	for bm.GetCardinality() > 0 {
   116  		if err := f(CutLeft(bm, sizeLimit), bm.GetCardinality() == 0); err != nil {
   117  			return err
   118  		}
   119  	}
   120  	return nil
   121  }
   122  
   123  func WalkChunkWithKeys(k []byte, m *roaring.Bitmap, sizeLimit uint64, f func(chunkKey []byte, chunk *roaring.Bitmap) error) error {
   124  	return WalkChunks(m, sizeLimit, func(chunk *roaring.Bitmap, isLast bool) error {
   125  		chunkKey := make([]byte, len(k)+4)
   126  		copy(chunkKey, k)
   127  		if isLast {
   128  			binary.BigEndian.PutUint32(chunkKey[len(k):], ^uint32(0))
   129  		} else {
   130  			binary.BigEndian.PutUint32(chunkKey[len(k):], chunk.Maximum())
   131  		}
   132  		return f(chunkKey, chunk)
   133  	})
   134  }
   135  
   136  // TruncateRange - gets existing bitmap in db and call RemoveRange operator on it.
   137  // starts from hot shard, stops when shard not overlap with [from-to)
   138  // !Important: [from, to)
   139  func TruncateRange(db kv.RwTx, bucket string, key []byte, to uint32) error {
   140  	chunkKey := make([]byte, len(key)+4)
   141  	copy(chunkKey, key)
   142  	binary.BigEndian.PutUint32(chunkKey[len(chunkKey)-4:], to)
   143  	bm, err := Get(db, bucket, key, to, MaxUint32)
   144  	if err != nil {
   145  		return err
   146  	}
   147  
   148  	if bm.GetCardinality() > 0 && to <= bm.Maximum() {
   149  		bm.RemoveRange(uint64(to), uint64(bm.Maximum())+1)
   150  	}
   151  
   152  	c, err := db.Cursor(bucket)
   153  	if err != nil {
   154  		return err
   155  	}
   156  	defer c.Close()
   157  	if err := Walk(c, chunkKey, 0, func(k, v []byte) (bool, error) {
   158  		if !bytes.HasPrefix(k, key) {
   159  			return false, nil
   160  		}
   161  		if err := db.Delete(bucket, k); err != nil {
   162  			return false, err
   163  		}
   164  		return true, nil
   165  	}); err != nil {
   166  		return err
   167  	}
   168  
   169  	buf := bytes.NewBuffer(nil)
   170  	return WalkChunkWithKeys(key, bm, ChunkLimit, func(chunkKey []byte, chunk *roaring.Bitmap) error {
   171  		buf.Reset()
   172  		if _, err := chunk.WriteTo(buf); err != nil {
   173  			return err
   174  		}
   175  		return db.Put(bucket, chunkKey, common.Copy(buf.Bytes()))
   176  	})
   177  }
   178  
   179  // Get - reading as much chunks as needed to satisfy [from, to] condition
   180  // join all chunks to 1 bitmap by Or operator
   181  func Get(db kv.Tx, bucket string, key []byte, from, to uint32) (*roaring.Bitmap, error) {
   182  	var chunks []*roaring.Bitmap
   183  
   184  	fromKey := make([]byte, len(key)+4)
   185  	copy(fromKey, key)
   186  	binary.BigEndian.PutUint32(fromKey[len(fromKey)-4:], from)
   187  	c, err := db.Cursor(bucket)
   188  	if err != nil {
   189  		return nil, err
   190  	}
   191  	defer c.Close()
   192  	for k, v, err := c.Seek(fromKey); k != nil; k, v, err = c.Next() {
   193  		if err != nil {
   194  			return nil, err
   195  		}
   196  		if !bytes.HasPrefix(k, key) {
   197  			break
   198  		}
   199  		bm := NewBitmap()
   200  		defer ReturnToPool(bm)
   201  		if _, err := bm.ReadFrom(bytes.NewReader(v)); err != nil {
   202  			return nil, err
   203  		}
   204  		chunks = append(chunks, bm)
   205  		if binary.BigEndian.Uint32(k[len(k)-4:]) >= to {
   206  			break
   207  		}
   208  	}
   209  	if len(chunks) == 0 {
   210  		return roaring.New(), nil
   211  	}
   212  	return roaring.FastOr(chunks...), nil
   213  }
   214  
   215  // SeekInBitmap - returns value in bitmap which is >= n
   216  //
   217  //nolint:deadcode
   218  func SeekInBitmap(m *roaring.Bitmap, n uint32) (found uint32, ok bool) {
   219  	i := m.Iterator()
   220  	i.AdvanceIfNeeded(n)
   221  	ok = i.HasNext()
   222  	if ok {
   223  		found = i.Next()
   224  	}
   225  	return found, ok
   226  }
   227  
   228  // CutLeft - cut from bitmap `targetSize` bytes from left
   229  // removing lft part from `bm`
   230  // returns nil on zero cardinality
   231  func CutLeft64(bm *roaring64.Bitmap, sizeLimit uint64) *roaring64.Bitmap {
   232  	if bm.GetCardinality() == 0 {
   233  		return nil
   234  	}
   235  
   236  	sz := bm.GetSerializedSizeInBytes()
   237  	if sz <= sizeLimit {
   238  		lft := roaring64.New()
   239  		lft.AddRange(bm.Minimum(), bm.Maximum()+1)
   240  		lft.And(bm)
   241  		lft.RunOptimize()
   242  		bm.Clear()
   243  		return lft
   244  	}
   245  
   246  	from := bm.Minimum()
   247  	minMax := bm.Maximum() - bm.Minimum()
   248  	to := sort.Search(int(minMax), func(i int) bool { // can be optimized to avoid "too small steps", but let's leave it for readability
   249  		lft := roaring64.New() // bitmap.Clear() method intentionally not used here, because then serialized size of bitmap getting bigger
   250  		lft.AddRange(from, from+uint64(i)+1)
   251  		lft.And(bm)
   252  		lft.RunOptimize()
   253  		return lft.GetSerializedSizeInBytes() > sizeLimit
   254  	})
   255  
   256  	lft := roaring64.New()
   257  	lft.AddRange(from, from+uint64(to)) // no +1 because sort.Search returns element which is just higher threshold - but we need lower
   258  	lft.And(bm)
   259  	bm.RemoveRange(from, from+uint64(to))
   260  	lft.RunOptimize()
   261  	return lft
   262  }
   263  
   264  func WalkChunks64(bm *roaring64.Bitmap, sizeLimit uint64, f func(chunk *roaring64.Bitmap, isLast bool) error) error {
   265  	for bm.GetCardinality() > 0 {
   266  		if err := f(CutLeft64(bm, sizeLimit), bm.GetCardinality() == 0); err != nil {
   267  			return err
   268  		}
   269  	}
   270  	return nil
   271  }
   272  
   273  func WalkChunkWithKeys64(k []byte, m *roaring64.Bitmap, sizeLimit uint64, f func(chunkKey []byte, chunk *roaring64.Bitmap) error) error {
   274  	return WalkChunks64(m, sizeLimit, func(chunk *roaring64.Bitmap, isLast bool) error {
   275  		chunkKey := make([]byte, len(k)+8)
   276  		copy(chunkKey, k)
   277  		if isLast {
   278  			binary.BigEndian.PutUint64(chunkKey[len(k):], ^uint64(0))
   279  		} else {
   280  			binary.BigEndian.PutUint64(chunkKey[len(k):], chunk.Maximum())
   281  		}
   282  		return f(chunkKey, chunk)
   283  	})
   284  }
   285  
   286  // TruncateRange - gets existing bitmap in db and call RemoveRange operator on it.
   287  // starts from hot shard, stops when shard not overlap with [from-to)
   288  // !Important: [from, to)
   289  func TruncateRange64(db kv.RwTx, bucket string, key []byte, to uint64) error {
   290  	chunkKey := make([]byte, len(key)+8)
   291  	copy(chunkKey, key)
   292  	binary.BigEndian.PutUint64(chunkKey[len(chunkKey)-8:], to)
   293  	bm, err := Get64(db, bucket, key, to, math.MaxUint64)
   294  	if err != nil {
   295  		return err
   296  	}
   297  
   298  	if bm.GetCardinality() > 0 && to <= bm.Maximum() {
   299  		bm.RemoveRange(to, bm.Maximum()+1)
   300  	}
   301  
   302  	c, err := db.Cursor(bucket)
   303  	if err != nil {
   304  		return err
   305  	}
   306  	defer c.Close()
   307  	cDel, err := db.RwCursor(bucket)
   308  	if err != nil {
   309  		return err
   310  	}
   311  	defer cDel.Close()
   312  	if err := Walk(c, chunkKey, 0, func(k, v []byte) (bool, error) {
   313  		if !bytes.HasPrefix(k, key) {
   314  			return false, nil
   315  		}
   316  		if err := cDel.Delete(k); err != nil {
   317  			return false, err
   318  		}
   319  		return true, nil
   320  	}); err != nil {
   321  		return err
   322  	}
   323  
   324  	buf := bytes.NewBuffer(nil)
   325  	return WalkChunkWithKeys64(key, bm, ChunkLimit, func(chunkKey []byte, chunk *roaring64.Bitmap) error {
   326  		buf.Reset()
   327  		if _, err := chunk.WriteTo(buf); err != nil {
   328  			return err
   329  		}
   330  		return db.Put(bucket, chunkKey, common.Copy(buf.Bytes()))
   331  	})
   332  }
   333  
   334  // Get - reading as much chunks as needed to satisfy [from, to] condition
   335  // join all chunks to 1 bitmap by Or operator
   336  func Get64(db kv.Tx, bucket string, key []byte, from, to uint64) (*roaring64.Bitmap, error) {
   337  	var chunks []*roaring64.Bitmap
   338  
   339  	fromKey := make([]byte, len(key)+8)
   340  	copy(fromKey, key)
   341  	binary.BigEndian.PutUint64(fromKey[len(fromKey)-8:], from)
   342  
   343  	c, err := db.Cursor(bucket)
   344  	if err != nil {
   345  		return nil, err
   346  	}
   347  	defer c.Close()
   348  	for k, v, err := c.Seek(fromKey); k != nil; k, v, err = c.Next() {
   349  		if err != nil {
   350  			return nil, err
   351  		}
   352  		if !bytes.HasPrefix(k, key) {
   353  			break
   354  		}
   355  		bm := NewBitmap64()
   356  		defer ReturnToPool64(bm)
   357  		_, err := bm.ReadFrom(bytes.NewReader(v))
   358  		if err != nil {
   359  			return nil, err
   360  		}
   361  		chunks = append(chunks, bm)
   362  		if binary.BigEndian.Uint64(k[len(k)-8:]) >= to {
   363  			break
   364  		}
   365  	}
   366  
   367  	if len(chunks) == 0 {
   368  		return roaring64.New(), nil
   369  	}
   370  	return roaring64.FastOr(chunks...), nil
   371  }
   372  
   373  // SeekInBitmap - returns value in bitmap which is >= n
   374  func SeekInBitmap64(m *roaring64.Bitmap, n uint64) (found uint64, ok bool) {
   375  	if m.IsEmpty() {
   376  		return 0, false
   377  	}
   378  	if n == 0 {
   379  		return m.Minimum(), true
   380  	}
   381  	searchRank := m.Rank(n - 1)
   382  	if searchRank >= m.GetCardinality() {
   383  		return 0, false
   384  	}
   385  	found, _ = m.Select(searchRank)
   386  	return found, true
   387  }
   388  
   389  func Walk(c kv.Cursor, startkey []byte, fixedbits int, walker func(k, v []byte) (bool, error)) error {
   390  	fixedbytes, mask := Bytesmask(fixedbits)
   391  	k, v, err := c.Seek(startkey)
   392  	if err != nil {
   393  		return err
   394  	}
   395  	for k != nil && len(k) >= fixedbytes && (fixedbits == 0 || bytes.Equal(k[:fixedbytes-1], startkey[:fixedbytes-1]) && (k[fixedbytes-1]&mask) == (startkey[fixedbytes-1]&mask)) {
   396  		goOn, err := walker(k, v)
   397  		if err != nil {
   398  			return err
   399  		}
   400  		if !goOn {
   401  			break
   402  		}
   403  		k, v, err = c.Next()
   404  		if err != nil {
   405  			return err
   406  		}
   407  	}
   408  	return nil
   409  }
   410  
   411  func Bytesmask(fixedbits int) (fixedbytes int, mask byte) {
   412  	fixedbytes = common.BitLenToByteLen(fixedbits)
   413  	shiftbits := fixedbits & 7
   414  	mask = byte(0xff)
   415  	if shiftbits != 0 {
   416  		mask = 0xff << (8 - shiftbits)
   417  	}
   418  	return fixedbytes, mask
   419  }
   420  
   421  type ToBitmap interface {
   422  	ToBitmap() (*roaring64.Bitmap, error)
   423  }
   424  
   425  func ToIter(it roaring64.IntIterable64) *ToIterInterface { return &ToIterInterface{it: it} }
   426  
   427  type ToIterInterface struct{ it roaring64.IntIterable64 }
   428  
   429  func (i *ToIterInterface) HasNext() bool         { return i.it.HasNext() }
   430  func (i *ToIterInterface) Next() (uint64, error) { return i.it.Next(), nil }