github.com/vay007/go-ethereum@v1.9.7/trie/sync_bloom.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package trie 18 19 import ( 20 "encoding/binary" 21 "fmt" 22 "math" 23 "sync" 24 "sync/atomic" 25 "time" 26 27 "github.com/ethereum/go-ethereum/common" 28 "github.com/ethereum/go-ethereum/ethdb" 29 "github.com/ethereum/go-ethereum/log" 30 "github.com/ethereum/go-ethereum/metrics" 31 "github.com/steakknife/bloomfilter" 32 ) 33 34 var ( 35 bloomAddMeter = metrics.NewRegisteredMeter("trie/bloom/add", nil) 36 bloomLoadMeter = metrics.NewRegisteredMeter("trie/bloom/load", nil) 37 bloomTestMeter = metrics.NewRegisteredMeter("trie/bloom/test", nil) 38 bloomMissMeter = metrics.NewRegisteredMeter("trie/bloom/miss", nil) 39 bloomFaultMeter = metrics.NewRegisteredMeter("trie/bloom/fault", nil) 40 bloomErrorGauge = metrics.NewRegisteredGauge("trie/bloom/error", nil) 41 ) 42 43 // syncBloomHasher is a wrapper around a byte blob to satisfy the interface API 44 // requirements of the bloom library used. It's used to convert a trie hash into 45 // a 64 bit mini hash. 46 type syncBloomHasher []byte 47 48 func (f syncBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } 49 func (f syncBloomHasher) Sum(b []byte) []byte { panic("not implemented") } 50 func (f syncBloomHasher) Reset() { panic("not implemented") } 51 func (f syncBloomHasher) BlockSize() int { panic("not implemented") } 52 func (f syncBloomHasher) Size() int { return 8 } 53 func (f syncBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) } 54 55 // SyncBloom is a bloom filter used during fast sync to quickly decide if a trie 56 // node already exists on disk or not. It self populates from the provided disk 57 // database on creation in a background thread and will only start returning live 58 // results once that's finished. 59 type SyncBloom struct { 60 bloom *bloomfilter.Filter 61 inited uint32 62 closer sync.Once 63 closed uint32 64 pend sync.WaitGroup 65 } 66 67 // NewSyncBloom creates a new bloom filter of the given size (in megabytes) and 68 // initializes it from the database. The bloom is hard coded to use 3 filters. 69 func NewSyncBloom(memory uint64, database ethdb.Iteratee) *SyncBloom { 70 // Create the bloom filter to track known trie nodes 71 bloom, err := bloomfilter.New(memory*1024*1024*8, 3) 72 if err != nil { 73 panic(fmt.Sprintf("failed to create bloom: %v", err)) 74 } 75 log.Info("Allocated fast sync bloom", "size", common.StorageSize(memory*1024*1024)) 76 77 // Assemble the fast sync bloom and init it from previous sessions 78 b := &SyncBloom{ 79 bloom: bloom, 80 } 81 b.pend.Add(2) 82 go func() { 83 defer b.pend.Done() 84 b.init(database) 85 }() 86 go func() { 87 defer b.pend.Done() 88 b.meter() 89 }() 90 return b 91 } 92 93 // init iterates over the database, pushing every trie hash into the bloom filter. 94 func (b *SyncBloom) init(database ethdb.Iteratee) { 95 // Iterate over the database, but restart every now and again to avoid holding 96 // a persistent snapshot since fast sync can push a ton of data concurrently, 97 // bloating the disk. 98 // 99 // Note, this is fine, because everything inserted into leveldb by fast sync is 100 // also pushed into the bloom directly, so we're not missing anything when the 101 // iterator is swapped out for a new one. 102 it := database.NewIterator() 103 104 var ( 105 start = time.Now() 106 swap = time.Now() 107 ) 108 for it.Next() && atomic.LoadUint32(&b.closed) == 0 { 109 // If the database entry is a trie node, add it to the bloom 110 if key := it.Key(); len(key) == common.HashLength { 111 b.bloom.Add(syncBloomHasher(key)) 112 bloomLoadMeter.Mark(1) 113 } 114 // If enough time elapsed since the last iterator swap, restart 115 if time.Since(swap) > 8*time.Second { 116 key := common.CopyBytes(it.Key()) 117 118 it.Release() 119 it = database.NewIteratorWithStart(key) 120 121 log.Info("Initializing fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start))) 122 swap = time.Now() 123 } 124 } 125 it.Release() 126 127 // Mark the bloom filter inited and return 128 log.Info("Initialized fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start))) 129 atomic.StoreUint32(&b.inited, 1) 130 } 131 132 // meter periodically recalculates the false positive error rate of the bloom 133 // filter and reports it in a metric. 134 func (b *SyncBloom) meter() { 135 for { 136 // Report the current error ration. No floats, lame, scale it up. 137 bloomErrorGauge.Update(int64(b.errorRate() * 100000)) 138 139 // Wait one second, but check termination more frequently 140 for i := 0; i < 10; i++ { 141 if atomic.LoadUint32(&b.closed) == 1 { 142 return 143 } 144 time.Sleep(100 * time.Millisecond) 145 } 146 } 147 } 148 149 // Close terminates any background initializer still running and releases all the 150 // memory allocated for the bloom. 151 func (b *SyncBloom) Close() error { 152 b.closer.Do(func() { 153 // Ensure the initializer is stopped 154 atomic.StoreUint32(&b.closed, 1) 155 b.pend.Wait() 156 157 // Wipe the bloom, but mark it "uninited" just in case someone attempts an access 158 log.Info("Deallocated fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate()) 159 160 atomic.StoreUint32(&b.inited, 0) 161 b.bloom = nil 162 }) 163 return nil 164 } 165 166 // Add inserts a new trie node hash into the bloom filter. 167 func (b *SyncBloom) Add(hash []byte) { 168 if atomic.LoadUint32(&b.closed) == 1 { 169 return 170 } 171 b.bloom.Add(syncBloomHasher(hash)) 172 bloomAddMeter.Mark(1) 173 } 174 175 // Contains tests if the bloom filter contains the given hash: 176 // - false: the bloom definitely does not contain hash 177 // - true: the bloom maybe contains hash 178 // 179 // While the bloom is being initialized, any query will return true. 180 func (b *SyncBloom) Contains(hash []byte) bool { 181 bloomTestMeter.Mark(1) 182 if atomic.LoadUint32(&b.inited) == 0 { 183 // We didn't load all the trie nodes from the previous run of Geth yet. As 184 // such, we can't say for sure if a hash is not present for anything. Until 185 // the init is done, we're faking "possible presence" for everything. 186 return true 187 } 188 // Bloom initialized, check the real one and report any successful misses 189 maybe := b.bloom.Contains(syncBloomHasher(hash)) 190 if !maybe { 191 bloomMissMeter.Mark(1) 192 } 193 return maybe 194 } 195 196 // errorRate calculates the probability of a random containment test returning a 197 // false positive. 198 // 199 // We're calculating it ourselves because the bloom library we used missed a 200 // parentheses in the formula and calculates it wrong. And it's discontinued... 201 func (b *SyncBloom) errorRate() float64 { 202 k := float64(b.bloom.K()) 203 n := float64(b.bloom.N()) 204 m := float64(b.bloom.M()) 205 206 return math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k) 207 }