github.com/amazechain/amc@v0.1.3/modules/ethdb/bitmapdb/dbutils.go (about)

     1  // Copyright 2023 The AmazeChain Authors
     2  // This file is part of the AmazeChain library.
     3  //
     4  // The AmazeChain library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The AmazeChain library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the AmazeChain library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package bitmapdb
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/binary"
    22  	"github.com/amazechain/amc/modules/ethdb"
    23  	"math"
    24  	"sort"
    25  	"sync"
    26  
    27  	"github.com/RoaringBitmap/roaring"
    28  	"github.com/RoaringBitmap/roaring/roaring64"
    29  	"github.com/c2h5oh/datasize"
    30  	libcommon "github.com/ledgerwatch/erigon-lib/common"
    31  	"github.com/ledgerwatch/erigon-lib/kv"
    32  )
    33  
    34  var roaringPool = sync.Pool{
    35  	New: func() any {
    36  		return roaring.New()
    37  	},
    38  }
    39  
    40  func NewBitmap() *roaring.Bitmap {
    41  	a := roaringPool.Get().(*roaring.Bitmap)
    42  	a.Clear()
    43  	return a
    44  }
    45  func ReturnToPool(a *roaring.Bitmap) {
    46  	roaringPool.Put(a)
    47  }
    48  
    49  var roaring64Pool = sync.Pool{
    50  	New: func() any {
    51  		return roaring64.New()
    52  	},
    53  }
    54  
    55  func NewBitmap64() *roaring64.Bitmap {
    56  	a := roaring64Pool.Get().(*roaring64.Bitmap)
    57  	a.Clear()
    58  	return a
    59  }
    60  func ReturnToPool64(a *roaring64.Bitmap) {
    61  	roaring64Pool.Put(a)
    62  }
    63  
    64  const ChunkLimit = uint64(1950 * datasize.B) // threshold beyond which MDBX overflow pages appear: 4096 / 2 - (keySize + 8)
    65  
    66  // CutLeft - cut from bitmap `targetSize` bytes from left
    67  // removing lft part from `bm`
    68  // returns nil on zero cardinality
    69  func CutLeft(bm *roaring.Bitmap, sizeLimit uint64) *roaring.Bitmap {
    70  	if bm.GetCardinality() == 0 {
    71  		return nil
    72  	}
    73  
    74  	sz := bm.GetSerializedSizeInBytes()
    75  	if sz <= sizeLimit {
    76  		lft := roaring.New()
    77  		lft.AddRange(uint64(bm.Minimum()), uint64(bm.Maximum())+1)
    78  		lft.And(bm)
    79  		lft.RunOptimize()
    80  		bm.Clear()
    81  		return lft
    82  	}
    83  
    84  	from := uint64(bm.Minimum())
    85  	minMax := bm.Maximum() - bm.Minimum()
    86  	to := sort.Search(int(minMax), func(i int) bool { // can be optimized to avoid "too small steps", but let's leave it for readability
    87  		lft := roaring.New() // bitmap.Clear() method intentionally not used here, because then serialized size of bitmap getting bigger
    88  		lft.AddRange(from, from+uint64(i)+1)
    89  		lft.And(bm)
    90  		lft.RunOptimize()
    91  		return lft.GetSerializedSizeInBytes() > sizeLimit
    92  	})
    93  
    94  	lft := roaring.New()
    95  	lft.AddRange(from, from+uint64(to)) // no +1 because sort.Search returns element which is just higher threshold - but we need lower
    96  	lft.And(bm)
    97  	bm.RemoveRange(from, from+uint64(to))
    98  	lft.RunOptimize()
    99  	return lft
   100  }
   101  
   102  func WalkChunks(bm *roaring.Bitmap, sizeLimit uint64, f func(chunk *roaring.Bitmap, isLast bool) error) error {
   103  	for bm.GetCardinality() > 0 {
   104  		if err := f(CutLeft(bm, sizeLimit), bm.GetCardinality() == 0); err != nil {
   105  			return err
   106  		}
   107  	}
   108  	return nil
   109  }
   110  
   111  func WalkChunkWithKeys(k []byte, m *roaring.Bitmap, sizeLimit uint64, f func(chunkKey []byte, chunk *roaring.Bitmap) error) error {
   112  	return WalkChunks(m, sizeLimit, func(chunk *roaring.Bitmap, isLast bool) error {
   113  		chunkKey := make([]byte, len(k)+4)
   114  		copy(chunkKey, k)
   115  		if isLast {
   116  			binary.BigEndian.PutUint32(chunkKey[len(k):], ^uint32(0))
   117  		} else {
   118  			binary.BigEndian.PutUint32(chunkKey[len(k):], chunk.Maximum())
   119  		}
   120  		return f(chunkKey, chunk)
   121  	})
   122  }
   123  
   124  // TruncateRange - gets existing bitmap in db and call RemoveRange operator on it.
   125  // starts from hot shard, stops when shard not overlap with [from-to)
   126  // !Important: [from, to)
   127  func TruncateRange(db kv.RwTx, bucket string, key []byte, to uint32) error {
   128  	chunkKey := make([]byte, len(key)+4)
   129  	copy(chunkKey, key)
   130  	binary.BigEndian.PutUint32(chunkKey[len(chunkKey)-4:], to)
   131  	bm, err := Get(db, bucket, key, to, math.MaxUint32)
   132  	if err != nil {
   133  		return err
   134  	}
   135  
   136  	if bm.GetCardinality() > 0 && to <= bm.Maximum() {
   137  		bm.RemoveRange(uint64(to), uint64(bm.Maximum())+1)
   138  	}
   139  
   140  	c, err := db.Cursor(bucket)
   141  	if err != nil {
   142  		return err
   143  	}
   144  	defer c.Close()
   145  	if err := ethdb.Walk(c, chunkKey, 0, func(k, v []byte) (bool, error) {
   146  		if !bytes.HasPrefix(k, key) {
   147  			return false, nil
   148  		}
   149  		if err := db.Delete(bucket, k); err != nil {
   150  			return false, err
   151  		}
   152  		return true, nil
   153  	}); err != nil {
   154  		return err
   155  	}
   156  
   157  	buf := bytes.NewBuffer(nil)
   158  	return WalkChunkWithKeys(key, bm, ChunkLimit, func(chunkKey []byte, chunk *roaring.Bitmap) error {
   159  		buf.Reset()
   160  		if _, err := chunk.WriteTo(buf); err != nil {
   161  			return err
   162  		}
   163  		return db.Put(bucket, chunkKey, libcommon.Copy(buf.Bytes()))
   164  	})
   165  }
   166  
   167  // Get - reading as much chunks as needed to satisfy [from, to] condition
   168  // join all chunks to 1 bitmap by Or operator
   169  func Get(db kv.Tx, bucket string, key []byte, from, to uint32) (*roaring.Bitmap, error) {
   170  	var chunks []*roaring.Bitmap
   171  
   172  	fromKey := make([]byte, len(key)+4)
   173  	copy(fromKey, key)
   174  	binary.BigEndian.PutUint32(fromKey[len(fromKey)-4:], from)
   175  	c, err := db.Cursor(bucket)
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  	defer c.Close()
   180  	for k, v, err := c.Seek(fromKey); k != nil; k, v, err = c.Next() {
   181  		if err != nil {
   182  			return nil, err
   183  		}
   184  		if !bytes.HasPrefix(k, key) {
   185  			break
   186  		}
   187  		bm := NewBitmap()
   188  		defer ReturnToPool(bm)
   189  		if _, err := bm.ReadFrom(bytes.NewReader(v)); err != nil {
   190  			return nil, err
   191  		}
   192  		chunks = append(chunks, bm)
   193  		if binary.BigEndian.Uint32(k[len(k)-4:]) >= to {
   194  			break
   195  		}
   196  	}
   197  	if len(chunks) == 0 {
   198  		return roaring.New(), nil
   199  	}
   200  	return roaring.FastOr(chunks...), nil
   201  }
   202  
   203  // SeekInBitmap - returns value in bitmap which is >= n
   204  //
   205  //nolint:deadcode
   206  func SeekInBitmap(m *roaring.Bitmap, n uint32) (found uint32, ok bool) {
   207  	i := m.Iterator()
   208  	i.AdvanceIfNeeded(n)
   209  	ok = i.HasNext()
   210  	if ok {
   211  		found = i.Next()
   212  	}
   213  	return found, ok
   214  }
   215  
   216  // CutLeft - cut from bitmap `targetSize` bytes from left
   217  // removing lft part from `bm`
   218  // returns nil on zero cardinality
   219  func CutLeft64(bm *roaring64.Bitmap, sizeLimit uint64) *roaring64.Bitmap {
   220  	if bm.GetCardinality() == 0 {
   221  		return nil
   222  	}
   223  
   224  	sz := bm.GetSerializedSizeInBytes()
   225  	if sz <= sizeLimit {
   226  		lft := roaring64.New()
   227  		lft.AddRange(bm.Minimum(), bm.Maximum()+1)
   228  		lft.And(bm)
   229  		lft.RunOptimize()
   230  		bm.Clear()
   231  		return lft
   232  	}
   233  
   234  	from := bm.Minimum()
   235  	minMax := bm.Maximum() - bm.Minimum()
   236  	to := sort.Search(int(minMax), func(i int) bool { // can be optimized to avoid "too small steps", but let's leave it for readability
   237  		lft := roaring64.New() // bitmap.Clear() method intentionally not used here, because then serialized size of bitmap getting bigger
   238  		lft.AddRange(from, from+uint64(i)+1)
   239  		lft.And(bm)
   240  		lft.RunOptimize()
   241  		return lft.GetSerializedSizeInBytes() > sizeLimit
   242  	})
   243  
   244  	lft := roaring64.New()
   245  	lft.AddRange(from, from+uint64(to)) // no +1 because sort.Search returns element which is just higher threshold - but we need lower
   246  	lft.And(bm)
   247  	bm.RemoveRange(from, from+uint64(to))
   248  	lft.RunOptimize()
   249  	return lft
   250  }
   251  
   252  func WalkChunks64(bm *roaring64.Bitmap, sizeLimit uint64, f func(chunk *roaring64.Bitmap, isLast bool) error) error {
   253  	for bm.GetCardinality() > 0 {
   254  		if err := f(CutLeft64(bm, sizeLimit), bm.GetCardinality() == 0); err != nil {
   255  			return err
   256  		}
   257  	}
   258  	return nil
   259  }
   260  
   261  func WalkChunkWithKeys64(k []byte, m *roaring64.Bitmap, sizeLimit uint64, f func(chunkKey []byte, chunk *roaring64.Bitmap) error) error {
   262  	return WalkChunks64(m, sizeLimit, func(chunk *roaring64.Bitmap, isLast bool) error {
   263  		chunkKey := make([]byte, len(k)+8)
   264  		copy(chunkKey, k)
   265  		if isLast {
   266  			binary.BigEndian.PutUint64(chunkKey[len(k):], ^uint64(0))
   267  		} else {
   268  			binary.BigEndian.PutUint64(chunkKey[len(k):], chunk.Maximum())
   269  		}
   270  		return f(chunkKey, chunk)
   271  	})
   272  }
   273  
   274  // TruncateRange - gets existing bitmap in db and call RemoveRange operator on it.
   275  // starts from hot shard, stops when shard not overlap with [from-to)
   276  // !Important: [from, to)
   277  func TruncateRange64(db kv.RwTx, bucket string, key []byte, to uint64) error {
   278  	chunkKey := make([]byte, len(key)+8)
   279  	copy(chunkKey, key)
   280  	binary.BigEndian.PutUint64(chunkKey[len(chunkKey)-8:], to)
   281  	bm, err := Get64(db, bucket, key, to, math.MaxUint64)
   282  	if err != nil {
   283  		return err
   284  	}
   285  
   286  	if bm.GetCardinality() > 0 && to <= bm.Maximum() {
   287  		bm.RemoveRange(to, bm.Maximum()+1)
   288  	}
   289  
   290  	c, err := db.Cursor(bucket)
   291  	if err != nil {
   292  		return err
   293  	}
   294  	defer c.Close()
   295  	cDel, err := db.RwCursor(bucket)
   296  	if err != nil {
   297  		return err
   298  	}
   299  	defer cDel.Close()
   300  	if err := ethdb.Walk(c, chunkKey, 0, func(k, v []byte) (bool, error) {
   301  		if !bytes.HasPrefix(k, key) {
   302  			return false, nil
   303  		}
   304  		if err := cDel.Delete(k); err != nil {
   305  			return false, err
   306  		}
   307  		return true, nil
   308  	}); err != nil {
   309  		return err
   310  	}
   311  
   312  	buf := bytes.NewBuffer(nil)
   313  	return WalkChunkWithKeys64(key, bm, ChunkLimit, func(chunkKey []byte, chunk *roaring64.Bitmap) error {
   314  		buf.Reset()
   315  		if _, err := chunk.WriteTo(buf); err != nil {
   316  			return err
   317  		}
   318  		return db.Put(bucket, chunkKey, libcommon.Copy(buf.Bytes()))
   319  	})
   320  }
   321  
   322  // Get - reading as much chunks as needed to satisfy [from, to] condition
   323  // join all chunks to 1 bitmap by Or operator
   324  func Get64(db kv.Tx, bucket string, key []byte, from, to uint64) (*roaring64.Bitmap, error) {
   325  	var chunks []*roaring64.Bitmap
   326  
   327  	fromKey := make([]byte, len(key)+8)
   328  	copy(fromKey, key)
   329  	binary.BigEndian.PutUint64(fromKey[len(fromKey)-8:], from)
   330  
   331  	c, err := db.Cursor(bucket)
   332  	if err != nil {
   333  		return nil, err
   334  	}
   335  	defer c.Close()
   336  	for k, v, err := c.Seek(fromKey); k != nil; k, v, err = c.Next() {
   337  		if err != nil {
   338  			return nil, err
   339  		}
   340  		if !bytes.HasPrefix(k, key) {
   341  			break
   342  		}
   343  		bm := NewBitmap64()
   344  		defer ReturnToPool64(bm)
   345  		_, err := bm.ReadFrom(bytes.NewReader(v))
   346  		if err != nil {
   347  			return nil, err
   348  		}
   349  		chunks = append(chunks, bm)
   350  		if binary.BigEndian.Uint64(k[len(k)-8:]) >= to {
   351  			break
   352  		}
   353  	}
   354  
   355  	if len(chunks) == 0 {
   356  		return roaring64.New(), nil
   357  	}
   358  	return roaring64.FastOr(chunks...), nil
   359  }
   360  
   361  // SeekInBitmap - returns value in bitmap which is >= n
   362  func SeekInBitmap64(m *roaring64.Bitmap, n uint64) (found uint64, ok bool) {
   363  	if m.IsEmpty() {
   364  		return 0, false
   365  	}
   366  	if n == 0 {
   367  		return m.Minimum(), true
   368  	}
   369  	searchRank := m.Rank(n - 1)
   370  	if searchRank >= m.GetCardinality() {
   371  		return 0, false
   372  	}
   373  	found, _ = m.Select(searchRank)
   374  	return found, true
   375  }