github.com/aigarnetwork/aigar@v0.0.0-20191115204914-d59a6eb70f8e/trie/sync_bloom.go (about)

     1  //  Copyright 2018 The go-ethereum Authors
     2  //  Copyright 2019 The go-aigar Authors
     3  //  This file is part of the go-aigar library.
     4  //
     5  //  The go-aigar library is free software: you can redistribute it and/or modify
     6  //  it under the terms of the GNU Lesser General Public License as published by
     7  //  the Free Software Foundation, either version 3 of the License, or
     8  //  (at your option) any later version.
     9  //
    10  //  The go-aigar library is distributed in the hope that it will be useful,
    11  //  but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  //  GNU Lesser General Public License for more details.
    14  //
    15  //  You should have received a copy of the GNU Lesser General Public License
    16  //  along with the go-aigar library. If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package trie
    19  
    20  import (
    21  	"encoding/binary"
    22  	"fmt"
    23  	"math"
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	"github.com/AigarNetwork/aigar/common"
    29  	"github.com/AigarNetwork/aigar/ethdb"
    30  	"github.com/AigarNetwork/aigar/log"
    31  	"github.com/AigarNetwork/aigar/metrics"
    32  	"github.com/steakknife/bloomfilter"
    33  )
    34  
    35  var (
    36  	bloomAddMeter   = metrics.NewRegisteredMeter("trie/bloom/add", nil)
    37  	bloomLoadMeter  = metrics.NewRegisteredMeter("trie/bloom/load", nil)
    38  	bloomTestMeter  = metrics.NewRegisteredMeter("trie/bloom/test", nil)
    39  	bloomMissMeter  = metrics.NewRegisteredMeter("trie/bloom/miss", nil)
    40  	bloomFaultMeter = metrics.NewRegisteredMeter("trie/bloom/fault", nil)
    41  	bloomErrorGauge = metrics.NewRegisteredGauge("trie/bloom/error", nil)
    42  )
    43  
    44  // syncBloomHasher is a wrapper around a byte blob to satisfy the interface API
    45  // requirements of the bloom library used. It's used to convert a trie hash into
    46  // a 64 bit mini hash.
    47  type syncBloomHasher []byte
    48  
    49  func (f syncBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
    50  func (f syncBloomHasher) Sum(b []byte) []byte               { panic("not implemented") }
    51  func (f syncBloomHasher) Reset()                            { panic("not implemented") }
    52  func (f syncBloomHasher) BlockSize() int                    { panic("not implemented") }
    53  func (f syncBloomHasher) Size() int                         { return 8 }
    54  func (f syncBloomHasher) Sum64() uint64                     { return binary.BigEndian.Uint64(f) }
    55  
    56  // SyncBloom is a bloom filter used during fast sync to quickly decide if a trie
    57  // node already exists on disk or not. It self populates from the provided disk
    58  // database on creation in a background thread and will only start returning live
    59  // results once that's finished.
    60  type SyncBloom struct {
    61  	bloom  *bloomfilter.Filter
    62  	inited uint32
    63  	closer sync.Once
    64  	closed uint32
    65  	pend   sync.WaitGroup
    66  }
    67  
    68  // NewSyncBloom creates a new bloom filter of the given size (in megabytes) and
    69  // initializes it from the database. The bloom is hard coded to use 3 filters.
    70  func NewSyncBloom(memory uint64, database ethdb.Iteratee) *SyncBloom {
    71  	// Create the bloom filter to track known trie nodes
    72  	bloom, err := bloomfilter.New(memory*1024*1024*8, 3)
    73  	if err != nil {
    74  		panic(fmt.Sprintf("failed to create bloom: %v", err))
    75  	}
    76  	log.Info("Allocated fast sync bloom", "size", common.StorageSize(memory*1024*1024))
    77  
    78  	// Assemble the fast sync bloom and init it from previous sessions
    79  	b := &SyncBloom{
    80  		bloom: bloom,
    81  	}
    82  	b.pend.Add(2)
    83  	go func() {
    84  		defer b.pend.Done()
    85  		b.init(database)
    86  	}()
    87  	go func() {
    88  		defer b.pend.Done()
    89  		b.meter()
    90  	}()
    91  	return b
    92  }
    93  
    94  // init iterates over the database, pushing every trie hash into the bloom filter.
    95  func (b *SyncBloom) init(database ethdb.Iteratee) {
    96  	// Iterate over the database, but restart every now and again to avoid holding
    97  	// a persistent snapshot since fast sync can push a ton of data concurrently,
    98  	// bloating the disk.
    99  	//
   100  	// Note, this is fine, because everything inserted into leveldb by fast sync is
   101  	// also pushed into the bloom directly, so we're not missing anything when the
   102  	// iterator is swapped out for a new one.
   103  	it := database.NewIterator()
   104  
   105  	var (
   106  		start = time.Now()
   107  		swap  = time.Now()
   108  	)
   109  	for it.Next() && atomic.LoadUint32(&b.closed) == 0 {
   110  		// If the database entry is a trie node, add it to the bloom
   111  		if key := it.Key(); len(key) == common.HashLength {
   112  			b.bloom.Add(syncBloomHasher(key))
   113  			bloomLoadMeter.Mark(1)
   114  		}
   115  		// If enough time elapsed since the last iterator swap, restart
   116  		if time.Since(swap) > 8*time.Second {
   117  			key := common.CopyBytes(it.Key())
   118  
   119  			it.Release()
   120  			it = database.NewIteratorWithStart(key)
   121  
   122  			log.Info("Initializing fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start)))
   123  			swap = time.Now()
   124  		}
   125  	}
   126  	it.Release()
   127  
   128  	// Mark the bloom filter inited and return
   129  	log.Info("Initialized fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start)))
   130  	atomic.StoreUint32(&b.inited, 1)
   131  }
   132  
   133  // meter periodically recalculates the false positive error rate of the bloom
   134  // filter and reports it in a metric.
   135  func (b *SyncBloom) meter() {
   136  	for {
   137  		// Report the current error ration. No floats, lame, scale it up.
   138  		bloomErrorGauge.Update(int64(b.errorRate() * 100000))
   139  
   140  		// Wait one second, but check termination more frequently
   141  		for i := 0; i < 10; i++ {
   142  			if atomic.LoadUint32(&b.closed) == 1 {
   143  				return
   144  			}
   145  			time.Sleep(100 * time.Millisecond)
   146  		}
   147  	}
   148  }
   149  
   150  // Close terminates any background initializer still running and releases all the
   151  // memory allocated for the bloom.
   152  func (b *SyncBloom) Close() error {
   153  	b.closer.Do(func() {
   154  		// Ensure the initializer is stopped
   155  		atomic.StoreUint32(&b.closed, 1)
   156  		b.pend.Wait()
   157  
   158  		// Wipe the bloom, but mark it "uninited" just in case someone attempts an access
   159  		log.Info("Deallocated fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate())
   160  
   161  		atomic.StoreUint32(&b.inited, 0)
   162  		b.bloom = nil
   163  	})
   164  	return nil
   165  }
   166  
   167  // Add inserts a new trie node hash into the bloom filter.
   168  func (b *SyncBloom) Add(hash []byte) {
   169  	if atomic.LoadUint32(&b.closed) == 1 {
   170  		return
   171  	}
   172  	b.bloom.Add(syncBloomHasher(hash))
   173  	bloomAddMeter.Mark(1)
   174  }
   175  
   176  // Contains tests if the bloom filter contains the given hash:
   177  //   - false: the bloom definitely does not contain hash
   178  //   - true:  the bloom maybe contains hash
   179  //
   180  // While the bloom is being initialized, any query will return true.
   181  func (b *SyncBloom) Contains(hash []byte) bool {
   182  	bloomTestMeter.Mark(1)
   183  	if atomic.LoadUint32(&b.inited) == 0 {
   184  		// We didn't load all the trie nodes from the previous run of Geth yet. As
   185  		// such, we can't say for sure if a hash is not present for anything. Until
   186  		// the init is done, we're faking "possible presence" for everything.
   187  		return true
   188  	}
   189  	// Bloom initialized, check the real one and report any successful misses
   190  	maybe := b.bloom.Contains(syncBloomHasher(hash))
   191  	if !maybe {
   192  		bloomMissMeter.Mark(1)
   193  	}
   194  	return maybe
   195  }
   196  
   197  // errorRate calculates the probability of a random containment test returning a
   198  // false positive.
   199  //
   200  // We're calculating it ourselves because the bloom library we used missed a
   201  // parentheses in the formula and calculates it wrong. And it's discontinued...
   202  func (b *SyncBloom) errorRate() float64 {
   203  	k := float64(b.bloom.K())
   204  	n := float64(b.bloom.N())
   205  	m := float64(b.bloom.M())
   206  
   207  	return math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k)
   208  }