github.com/klaytn/klaytn@v1.12.1/storage/statedb/sync_bloom.go (about)

     1  // Copyright 2019 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  //
    17  // This file is derived from trie/sync_test.go (2020/05/20).
    18  // Modified and improved for the klaytn development.
    19  package statedb
    20  
    21  import (
    22  	"encoding/binary"
    23  	"fmt"
    24  	"math"
    25  	"sync"
    26  	"sync/atomic"
    27  	"time"
    28  
    29  	"github.com/klaytn/klaytn/common"
    30  	"github.com/klaytn/klaytn/log"
    31  	"github.com/klaytn/klaytn/storage/database"
    32  	"github.com/rcrowley/go-metrics"
    33  
    34  	"github.com/steakknife/bloomfilter"
    35  )
    36  
    37  var (
    38  	bloomAddMeter   = metrics.NewRegisteredMeter("trie/bloom/add", nil)
    39  	bloomLoadMeter  = metrics.NewRegisteredMeter("trie/bloom/load", nil)
    40  	bloomTestMeter  = metrics.NewRegisteredMeter("trie/bloom/test", nil)
    41  	bloomMissMeter  = metrics.NewRegisteredMeter("trie/bloom/miss", nil)
    42  	bloomFaultMeter = metrics.NewRegisteredMeter("trie/bloom/fault", nil)
    43  	bloomErrorGauge = metrics.NewRegisteredGauge("trie/bloom/error", nil)
    44  )
    45  
    46  // syncBloomHasher is a wrapper around a byte blob to satisfy the interface API
    47  // requirements of the bloom library used. It's used to convert a trie hash or
    48  // contract code hash into a 64 bit mini hash.
    49  type syncBloomHasher []byte
    50  
    51  func (f syncBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
    52  func (f syncBloomHasher) Sum(b []byte) []byte               { panic("not implemented") }
    53  func (f syncBloomHasher) Reset()                            { panic("not implemented") }
    54  func (f syncBloomHasher) BlockSize() int                    { panic("not implemented") }
    55  func (f syncBloomHasher) Size() int                         { return 8 }
    56  func (f syncBloomHasher) Sum64() uint64                     { return binary.BigEndian.Uint64(f) }
    57  
    58  // SyncBloom is a bloom filter used during fast sync to quickly decide if a trie
    59  // node or contract code already exists on disk or not. It self populates from the
    60  // provided disk database on creation in a background thread and will only start
    61  // returning live results once that's finished.
    62  type SyncBloom struct {
    63  	bloom  *bloomfilter.Filter
    64  	inited uint32
    65  	closer sync.Once
    66  	closed uint32
    67  	pend   sync.WaitGroup
    68  }
    69  
    70  // NewSyncBloom creates a new bloom filter of the given size (in megabytes) and
    71  // initializes it from the database. The bloom is hard coded to use 3 filters.
    72  func NewSyncBloom(memory uint64, database database.Iteratee) *SyncBloom {
    73  	// Create the bloom filter to track known trie nodes
    74  	bloom, err := bloomfilter.New(memory*1024*1024*8, 3)
    75  	if err != nil {
    76  		panic(fmt.Sprintf("failed to create bloom: %v", err))
    77  	}
    78  	logger.Info("Allocated fast sync bloom", "size", common.StorageSize(memory*1024*1024))
    79  
    80  	// Assemble the fast sync bloom and init it from previous sessions
    81  	b := &SyncBloom{
    82  		bloom: bloom,
    83  	}
    84  	b.pend.Add(2)
    85  	go func() {
    86  		defer b.pend.Done()
    87  		b.init(database)
    88  	}()
    89  	go func() {
    90  		defer b.pend.Done()
    91  		b.meter()
    92  	}()
    93  	return b
    94  }
    95  
    96  // init iterates over the database, pushing every trie hash into the bloom filter.
    97  func (b *SyncBloom) init(db database.Iteratee) {
    98  	// Iterate over the database, but restart every now and again to avoid holding
    99  	// a persistent snapshot since fast sync can push a ton of data concurrently,
   100  	// bloating the disk.
   101  	//
   102  	// Note, this is fine, because everything inserted into leveldb by fast sync is
   103  	// also pushed into the bloom directly, so we're not missing anything when the
   104  	// iterator is swapped out for a new one.
   105  	it := db.NewIterator(nil, nil)
   106  
   107  	var (
   108  		start = time.Now()
   109  		swap  = time.Now()
   110  	)
   111  	for it.Next() && atomic.LoadUint32(&b.closed) == 0 {
   112  		// If the database entry is a trie node, add it to the bloom
   113  		key := it.Key()
   114  		if len(key) == common.HashLength {
   115  			b.bloom.Add(syncBloomHasher(key))
   116  			bloomLoadMeter.Mark(1)
   117  		}
   118  		// If the database entry is a contract code, add it to the bloom
   119  		if ok, hash := database.IsCodeKey(key); ok {
   120  			b.bloom.Add(syncBloomHasher(hash))
   121  			bloomLoadMeter.Mark(1)
   122  		}
   123  
   124  		// If enough time elapsed since the last iterator swap, restart
   125  		if time.Since(swap) > log.StatsReportLimit {
   126  			key := common.CopyBytes(it.Key())
   127  
   128  			it.Release()
   129  			it = db.NewIterator(nil, key)
   130  
   131  			logger.Info("Initializing fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start)))
   132  			swap = time.Now()
   133  		}
   134  	}
   135  	it.Release()
   136  
   137  	// Mark the bloom filter inited and return
   138  	logger.Info("Initialized fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start)))
   139  	atomic.StoreUint32(&b.inited, 1)
   140  }
   141  
   142  // meter periodically recalculates the false positive error rate of the bloom
   143  // filter and reports it in a metric.
   144  func (b *SyncBloom) meter() {
   145  	for {
   146  		// Report the current error ration. No floats, lame, scale it up.
   147  		bloomErrorGauge.Update(int64(b.errorRate() * 100000))
   148  
   149  		// Wait one second, but check termination more frequently
   150  		for i := 0; i < 10; i++ {
   151  			if atomic.LoadUint32(&b.closed) == 1 {
   152  				return
   153  			}
   154  			time.Sleep(100 * time.Millisecond)
   155  		}
   156  	}
   157  }
   158  
   159  // Close terminates any background initializer still running and releases all the
   160  // memory allocated for the bloom.
   161  func (b *SyncBloom) Close() error {
   162  	b.closer.Do(func() {
   163  		// Ensure the initializer is stopped
   164  		atomic.StoreUint32(&b.closed, 1)
   165  		b.pend.Wait()
   166  
   167  		// Wipe the bloom, but mark it "uninited" just in case someone attempts an access
   168  		logger.Info("Deallocated fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate())
   169  
   170  		atomic.StoreUint32(&b.inited, 0)
   171  		b.bloom = nil
   172  	})
   173  	return nil
   174  }
   175  
   176  // Add inserts a new trie node hash into the bloom filter.
   177  func (b *SyncBloom) Add(hash []byte) {
   178  	if atomic.LoadUint32(&b.closed) == 1 {
   179  		return
   180  	}
   181  	b.bloom.Add(syncBloomHasher(hash))
   182  	bloomAddMeter.Mark(1)
   183  }
   184  
   185  // Contains tests if the bloom filter contains the given hash:
   186  //   - false: the bloom definitely does not contain hash
   187  //   - true:  the bloom maybe contains hash
   188  //
   189  // While the bloom is being initialized, any query will return true.
   190  func (b *SyncBloom) Contains(hash []byte) bool {
   191  	bloomTestMeter.Mark(1)
   192  	if atomic.LoadUint32(&b.inited) == 0 {
   193  		// We didn't load all the trie nodes from the previous run of Geth yet. As
   194  		// such, we can't say for sure if a hash is not present for anything. Until
   195  		// the init is done, we're faking "possible presence" for everything.
   196  		return true
   197  	}
   198  	// Bloom initialized, check the real one and report any successful misses
   199  	maybe := b.bloom.Contains(syncBloomHasher(hash))
   200  	if !maybe {
   201  		bloomMissMeter.Mark(1)
   202  	}
   203  	return maybe
   204  }
   205  
   206  // errorRate calculates the probability of a random containment test returning a
   207  // false positive.
   208  //
   209  // We're calculating it ourselves because the bloom library we used missed a
   210  // parentheses in the formula and calculates it wrong. And it's discontinued...
   211  func (b *SyncBloom) errorRate() float64 {
   212  	k := float64(b.bloom.K())
   213  	n := float64(b.bloom.N())
   214  	m := float64(b.bloom.M())
   215  
   216  	return math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k)
   217  }