github.com/aidoskuneen/adk-node@v0.0.0-20220315131952-2e32567cb7f4/trie/sync_bloom.go (about)

     1  // Copyright 2021 The adkgo Authors
     2  // This file is part of the adkgo library (adapted for adkgo from go--ethereum v1.10.8).
     3  //
     4  // the adkgo library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // the adkgo library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the adkgo library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package trie
    18  
    19  import (
    20  	"encoding/binary"
    21  	"fmt"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/aidoskuneen/adk-node/common"
    27  	"github.com/aidoskuneen/adk-node/core/rawdb"
    28  	"github.com/aidoskuneen/adk-node/ethdb"
    29  	"github.com/aidoskuneen/adk-node/log"
    30  	"github.com/aidoskuneen/adk-node/metrics"
    31  	bloomfilter "github.com/holiman/bloomfilter/v2"
    32  )
    33  
    34  var (
    35  	bloomAddMeter   = metrics.NewRegisteredMeter("trie/bloom/add", nil)
    36  	bloomLoadMeter  = metrics.NewRegisteredMeter("trie/bloom/load", nil)
    37  	bloomTestMeter  = metrics.NewRegisteredMeter("trie/bloom/test", nil)
    38  	bloomMissMeter  = metrics.NewRegisteredMeter("trie/bloom/miss", nil)
    39  	bloomFaultMeter = metrics.NewRegisteredMeter("trie/bloom/fault", nil)
    40  	bloomErrorGauge = metrics.NewRegisteredGauge("trie/bloom/error", nil)
    41  )
    42  
    43  // SyncBloom is a bloom filter used during fast sync to quickly decide if a trie
    44  // node or contract code already exists on disk or not. It self populates from the
    45  // provided disk database on creation in a background thread and will only start
    46  // returning live results once that's finished.
    47  type SyncBloom struct {
    48  	bloom   *bloomfilter.Filter
    49  	inited  uint32
    50  	closer  sync.Once
    51  	closed  uint32
    52  	pend    sync.WaitGroup
    53  	closeCh chan struct{}
    54  }
    55  
    56  // NewSyncBloom creates a new bloom filter of the given size (in megabytes) and
    57  // initializes it from the database. The bloom is hard coded to use 3 filters.
    58  func NewSyncBloom(memory uint64, database ethdb.Iteratee) *SyncBloom {
    59  	// Create the bloom filter to track known trie nodes
    60  	bloom, err := bloomfilter.New(memory*1024*1024*8, 4)
    61  	if err != nil {
    62  		panic(fmt.Sprintf("failed to create bloom: %v", err))
    63  	}
    64  	log.Info("Allocated fast sync bloom", "size", common.StorageSize(memory*1024*1024))
    65  
    66  	// Assemble the fast sync bloom and init it from previous sessions
    67  	b := &SyncBloom{
    68  		bloom:   bloom,
    69  		closeCh: make(chan struct{}),
    70  	}
    71  	b.pend.Add(2)
    72  	go func() {
    73  		defer b.pend.Done()
    74  		b.init(database)
    75  	}()
    76  	go func() {
    77  		defer b.pend.Done()
    78  		b.meter()
    79  	}()
    80  	return b
    81  }
    82  
    83  // init iterates over the database, pushing every trie hash into the bloom filter.
    84  func (b *SyncBloom) init(database ethdb.Iteratee) {
    85  	// Iterate over the database, but restart every now and again to avoid holding
    86  	// a persistent snapshot since fast sync can push a ton of data concurrently,
    87  	// bloating the disk.
    88  	//
    89  	// Note, this is fine, because everything inserted into leveldb by fast sync is
    90  	// also pushed into the bloom directly, so we're not missing anything when the
    91  	// iterator is swapped out for a new one.
    92  	it := database.NewIterator(nil, nil)
    93  
    94  	var (
    95  		start = time.Now()
    96  		swap  = time.Now()
    97  	)
    98  	for it.Next() && atomic.LoadUint32(&b.closed) == 0 {
    99  		// If the database entry is a trie node, add it to the bloom
   100  		key := it.Key()
   101  		if len(key) == common.HashLength {
   102  			b.bloom.AddHash(binary.BigEndian.Uint64(key))
   103  			bloomLoadMeter.Mark(1)
   104  		} else if ok, hash := rawdb.IsCodeKey(key); ok {
   105  			// If the database entry is a contract code, add it to the bloom
   106  			b.bloom.AddHash(binary.BigEndian.Uint64(hash))
   107  			bloomLoadMeter.Mark(1)
   108  		}
   109  		// If enough time elapsed since the last iterator swap, restart
   110  		if time.Since(swap) > 8*time.Second {
   111  			key := common.CopyBytes(it.Key())
   112  
   113  			it.Release()
   114  			it = database.NewIterator(nil, key)
   115  
   116  			log.Info("Initializing state bloom", "items", b.bloom.N(), "errorrate", b.bloom.FalsePosititveProbability(), "elapsed", common.PrettyDuration(time.Since(start)))
   117  			swap = time.Now()
   118  		}
   119  	}
   120  	it.Release()
   121  
   122  	// Mark the bloom filter inited and return
   123  	log.Info("Initialized state bloom", "items", b.bloom.N(), "errorrate", b.bloom.FalsePosititveProbability(), "elapsed", common.PrettyDuration(time.Since(start)))
   124  	atomic.StoreUint32(&b.inited, 1)
   125  }
   126  
   127  // meter periodically recalculates the false positive error rate of the bloom
   128  // filter and reports it in a metric.
   129  func (b *SyncBloom) meter() {
   130  	// check every second
   131  	tick := time.NewTicker(1 * time.Second)
   132  	for {
   133  		select {
   134  		case <-tick.C:
   135  			// Report the current error ration. No floats, lame, scale it up.
   136  			bloomErrorGauge.Update(int64(b.bloom.FalsePosititveProbability() * 100000))
   137  		case <-b.closeCh:
   138  			return
   139  		}
   140  	}
   141  }
   142  
   143  // Close terminates any background initializer still running and releases all the
   144  // memory allocated for the bloom.
   145  func (b *SyncBloom) Close() error {
   146  	b.closer.Do(func() {
   147  		// Ensure the initializer is stopped
   148  		atomic.StoreUint32(&b.closed, 1)
   149  		close(b.closeCh)
   150  		b.pend.Wait()
   151  
   152  		// Wipe the bloom, but mark it "uninited" just in case someone attempts an access
   153  		log.Info("Deallocated state bloom", "items", b.bloom.N(), "errorrate", b.bloom.FalsePosititveProbability())
   154  
   155  		atomic.StoreUint32(&b.inited, 0)
   156  		b.bloom = nil
   157  	})
   158  	return nil
   159  }
   160  
   161  // Add inserts a new trie node hash into the bloom filter.
   162  func (b *SyncBloom) Add(hash []byte) {
   163  	if atomic.LoadUint32(&b.closed) == 1 {
   164  		return
   165  	}
   166  	b.bloom.AddHash(binary.BigEndian.Uint64(hash))
   167  	bloomAddMeter.Mark(1)
   168  }
   169  
   170  // Contains tests if the bloom filter contains the given hash:
   171  //   - false: the bloom definitely does not contain hash
   172  //   - true:  the bloom maybe contains hash
   173  //
   174  // While the bloom is being initialized, any query will return true.
   175  func (b *SyncBloom) Contains(hash []byte) bool {
   176  	bloomTestMeter.Mark(1)
   177  	if atomic.LoadUint32(&b.inited) == 0 {
   178  		// We didn't load all the trie nodes from the previous run of Geth yet. As
   179  		// such, we can't say for sure if a hash is not present for anything. Until
   180  		// the init is done, we're faking "possible presence" for everything.
   181  		return true
   182  	}
   183  	// Bloom initialized, check the real one and report any successful misses
   184  	maybe := b.bloom.ContainsHash(binary.BigEndian.Uint64(hash))
   185  	if !maybe {
   186  		bloomMissMeter.Mark(1)
   187  	}
   188  	return maybe
   189  }