github.com/chain5j/chain5j-pkg@v1.0.7/collection/trees/tree/sync_bloom.go (about)

     1  // Copyright 2019 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package tree
    18  
    19  import (
    20  	"encoding/binary"
    21  	"fmt"
    22  	"math"
    23  	"sync"
    24  	"sync/atomic"
    25  	"time"
    26  
    27  	"github.com/chain5j/chain5j-pkg/database/kvstore"
    28  	"github.com/chain5j/chain5j-pkg/types"
    29  	"github.com/chain5j/chain5j-pkg/util/dateutil"
    30  	"github.com/chain5j/chain5j-pkg/util/hexutil"
    31  	"github.com/steakknife/bloomfilter"
    32  )
    33  
    34  // syncBloomHasher is a wrapper around a byte blob to satisfy the interface API
    35  // requirements of the bloom library used. It's used to convert a trie hash into
    36  // a 64 bit mini hash.
    37  type syncBloomHasher []byte
    38  
    39  func (f syncBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
    40  func (f syncBloomHasher) Sum(b []byte) []byte               { panic("not implemented") }
    41  func (f syncBloomHasher) Reset()                            { panic("not implemented") }
    42  func (f syncBloomHasher) BlockSize() int                    { panic("not implemented") }
    43  func (f syncBloomHasher) Size() int                         { return 8 }
    44  func (f syncBloomHasher) Sum64() uint64                     { return binary.BigEndian.Uint64(f) }
    45  
    46  // SyncBloom is a bloom filter used during fast sync to quickly decide if a trie
    47  // node already exists on disk or not. It self populates from the provided disk
    48  // database on creation in a background thread and will only start returning live
    49  // results once that's finished.
    50  type SyncBloom struct {
    51  	bloom  *bloomfilter.Filter
    52  	inited uint32
    53  	closer sync.Once
    54  	closed uint32
    55  	pend   sync.WaitGroup
    56  }
    57  
    58  // NewSyncBloom creates a new bloom filter of the given size (in megabytes) and
    59  // initializes it from the database. The bloom is hard coded to use 3 filters.
    60  func NewSyncBloom(memory uint64, database kvstore.Iteratee) *SyncBloom {
    61  	// Create the bloom filter to track known trie nodes
    62  	bloom, err := bloomfilter.New(memory*1024*1024*8, 3)
    63  	if err != nil {
    64  		panic(fmt.Sprintf("failed to create bloom: %v", err))
    65  	}
    66  	logger().Info("Allocated fast sync bloom", "size", types.StorageSize(memory*1024*1024))
    67  
    68  	// Assemble the fast sync bloom and init it from previous sessions
    69  	b := &SyncBloom{
    70  		bloom: bloom,
    71  	}
    72  	b.pend.Add(2)
    73  	go func() {
    74  		defer b.pend.Done()
    75  		b.init(database)
    76  	}()
    77  	go func() {
    78  		defer b.pend.Done()
    79  		b.meter()
    80  	}()
    81  	return b
    82  }
    83  
    84  // init iterates over the database, pushing every trie hash into the bloom filter.
    85  func (b *SyncBloom) init(database kvstore.Iteratee) {
    86  	// Iterate over the database, but restart every now and again to avoid holding
    87  	// a persistent snapshot since fast sync can push a ton of data concurrently,
    88  	// bloating the disk.
    89  	//
    90  	// Note, this is fine, because everything inserted into leveldb by fast sync is
    91  	// also pushed into the bloom directly, so we're not missing anything when the
    92  	// iterator is swapped out for a new one.
    93  	it := database.NewIterator()
    94  
    95  	var (
    96  		start = time.Now()
    97  		swap  = time.Now()
    98  	)
    99  	for it.Next() && atomic.LoadUint32(&b.closed) == 0 {
   100  		// If the database entry is a trie node, add it to the bloom
   101  		if key := it.Key(); len(key) == types.HashLength {
   102  			b.bloom.Add(syncBloomHasher(key))
   103  		}
   104  		// If enough time elapsed since the last iterator swap, restart
   105  		if time.Since(swap) > 8*time.Second {
   106  			key := hexutil.CopyBytes(it.Key())
   107  
   108  			it.Release()
   109  			it = database.NewIteratorWithStart(key)
   110  
   111  			logger().Info("Initializing fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", dateutil.PrettyDuration(time.Since(start)))
   112  			swap = time.Now()
   113  		}
   114  	}
   115  	it.Release()
   116  
   117  	// Mark the bloom filter inited and return
   118  	logger().Info("Initialized fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", dateutil.PrettyDuration(time.Since(start)))
   119  	atomic.StoreUint32(&b.inited, 1)
   120  }
   121  
   122  // meter periodically recalculates the false positive error rate of the bloom
   123  // filter and reports it in a metric.
   124  func (b *SyncBloom) meter() {
   125  	for {
   126  		// Wait one second, but check termination more frequently
   127  		for i := 0; i < 10; i++ {
   128  			if atomic.LoadUint32(&b.closed) == 1 {
   129  				return
   130  			}
   131  			time.Sleep(100 * time.Millisecond)
   132  		}
   133  	}
   134  }
   135  
   136  // Close terminates any background initializer still running and releases all the
   137  // memory allocated for the bloom.
   138  func (b *SyncBloom) Close() error {
   139  	b.closer.Do(func() {
   140  		// Ensure the initializer is stopped
   141  		atomic.StoreUint32(&b.closed, 1)
   142  		b.pend.Wait()
   143  
   144  		// Wipe the bloom, but mark it "uninited" just in case someone attempts an access
   145  		logger().Info("Deallocated fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate())
   146  
   147  		atomic.StoreUint32(&b.inited, 0)
   148  		b.bloom = nil
   149  	})
   150  	return nil
   151  }
   152  
   153  // Add inserts a new trie node hash into the bloom filter.
   154  func (b *SyncBloom) Add(hash []byte) {
   155  	if atomic.LoadUint32(&b.closed) == 1 {
   156  		return
   157  	}
   158  	b.bloom.Add(syncBloomHasher(hash))
   159  }
   160  
   161  // Contains tests if the bloom filter contains the given hash:
   162  //   - false: the bloom definitely does not contain hash
   163  //   - true:  the bloom maybe contains hash
   164  //
   165  // While the bloom is being initialized, any query will return true.
   166  func (b *SyncBloom) Contains(hash []byte) bool {
   167  	if atomic.LoadUint32(&b.inited) == 0 {
   168  		// We didn't load all the trie nodes from the previous run of Geth yet. As
   169  		// such, we can't say for sure if a hash is not present for anything. Until
   170  		// the init is done, we're faking "possible presence" for everything.
   171  		return true
   172  	}
   173  	// Bloom initialized, check the real one and report any successful misses
   174  	maybe := b.bloom.Contains(syncBloomHasher(hash))
   175  	return maybe
   176  }
   177  
   178  // errorRate calculates the probability of a random containment test returning a
   179  // false positive.
   180  //
   181  // We're calculating it ourselves because the bloom library we used missed a
   182  // parentheses in the formula and calculates it wrong. And it's discontinued...
   183  func (b *SyncBloom) errorRate() float64 {
   184  	k := float64(b.bloom.K())
   185  	n := float64(b.bloom.N())
   186  	m := float64(b.bloom.M())
   187  
   188  	return math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k)
   189  }