github.com/aswedchain/aswed@v1.0.1/trie/sync_bloom.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package trie 18 19 import ( 20 "encoding/binary" 21 "fmt" 22 "math" 23 "sync" 24 "sync/atomic" 25 "time" 26 27 "github.com/aswedchain/aswed/common" 28 "github.com/aswedchain/aswed/core/rawdb" 29 "github.com/aswedchain/aswed/ethdb" 30 "github.com/aswedchain/aswed/log" 31 "github.com/aswedchain/aswed/metrics" 32 "github.com/steakknife/bloomfilter" 33 ) 34 35 var ( 36 bloomAddMeter = metrics.NewRegisteredMeter("trie/bloom/add", nil) 37 bloomLoadMeter = metrics.NewRegisteredMeter("trie/bloom/load", nil) 38 bloomTestMeter = metrics.NewRegisteredMeter("trie/bloom/test", nil) 39 bloomMissMeter = metrics.NewRegisteredMeter("trie/bloom/miss", nil) 40 bloomFaultMeter = metrics.NewRegisteredMeter("trie/bloom/fault", nil) 41 bloomErrorGauge = metrics.NewRegisteredGauge("trie/bloom/error", nil) 42 ) 43 44 // syncBloomHasher is a wrapper around a byte blob to satisfy the interface API 45 // requirements of the bloom library used. It's used to convert a trie hash or 46 // contract code hash into a 64 bit mini hash. 47 type syncBloomHasher []byte 48 49 func (f syncBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } 50 func (f syncBloomHasher) Sum(b []byte) []byte { panic("not implemented") } 51 func (f syncBloomHasher) Reset() { panic("not implemented") } 52 func (f syncBloomHasher) BlockSize() int { panic("not implemented") } 53 func (f syncBloomHasher) Size() int { return 8 } 54 func (f syncBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) } 55 56 // SyncBloom is a bloom filter used during fast sync to quickly decide if a trie 57 // node or contract code already exists on disk or not. It self populates from the 58 // provided disk database on creation in a background thread and will only start 59 // returning live results once that's finished. 60 type SyncBloom struct { 61 bloom *bloomfilter.Filter 62 inited uint32 63 closer sync.Once 64 closed uint32 65 pend sync.WaitGroup 66 } 67 68 // NewSyncBloom creates a new bloom filter of the given size (in megabytes) and 69 // initializes it from the database. The bloom is hard coded to use 3 filters. 70 func NewSyncBloom(memory uint64, database ethdb.Iteratee) *SyncBloom { 71 // Create the bloom filter to track known trie nodes 72 bloom, err := bloomfilter.New(memory*1024*1024*8, 3) 73 if err != nil { 74 panic(fmt.Sprintf("failed to create bloom: %v", err)) 75 } 76 log.Info("Allocated fast sync bloom", "size", common.StorageSize(memory*1024*1024)) 77 78 // Assemble the fast sync bloom and init it from previous sessions 79 b := &SyncBloom{ 80 bloom: bloom, 81 } 82 b.pend.Add(2) 83 go func() { 84 defer b.pend.Done() 85 b.init(database) 86 }() 87 go func() { 88 defer b.pend.Done() 89 b.meter() 90 }() 91 return b 92 } 93 94 // init iterates over the database, pushing every trie hash into the bloom filter. 95 func (b *SyncBloom) init(database ethdb.Iteratee) { 96 // Iterate over the database, but restart every now and again to avoid holding 97 // a persistent snapshot since fast sync can push a ton of data concurrently, 98 // bloating the disk. 99 // 100 // Note, this is fine, because everything inserted into leveldb by fast sync is 101 // also pushed into the bloom directly, so we're not missing anything when the 102 // iterator is swapped out for a new one. 103 it := database.NewIterator(nil, nil) 104 105 var ( 106 start = time.Now() 107 swap = time.Now() 108 ) 109 for it.Next() && atomic.LoadUint32(&b.closed) == 0 { 110 // If the database entry is a trie node, add it to the bloom 111 key := it.Key() 112 if len(key) == common.HashLength { 113 b.bloom.Add(syncBloomHasher(key)) 114 bloomLoadMeter.Mark(1) 115 } 116 // If the database entry is a contract code, add it to the bloom 117 if ok, hash := rawdb.IsCodeKey(key); ok { 118 b.bloom.Add(syncBloomHasher(hash)) 119 bloomLoadMeter.Mark(1) 120 } 121 // If enough time elapsed since the last iterator swap, restart 122 if time.Since(swap) > 8*time.Second { 123 key := common.CopyBytes(it.Key()) 124 125 it.Release() 126 it = database.NewIterator(nil, key) 127 128 log.Info("Initializing fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start))) 129 swap = time.Now() 130 } 131 } 132 it.Release() 133 134 // Mark the bloom filter inited and return 135 log.Info("Initialized fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start))) 136 atomic.StoreUint32(&b.inited, 1) 137 } 138 139 // meter periodically recalculates the false positive error rate of the bloom 140 // filter and reports it in a metric. 141 func (b *SyncBloom) meter() { 142 for { 143 // Report the current error ration. No floats, lame, scale it up. 144 bloomErrorGauge.Update(int64(b.errorRate() * 100000)) 145 146 // Wait one second, but check termination more frequently 147 for i := 0; i < 10; i++ { 148 if atomic.LoadUint32(&b.closed) == 1 { 149 return 150 } 151 time.Sleep(100 * time.Millisecond) 152 } 153 } 154 } 155 156 // Close terminates any background initializer still running and releases all the 157 // memory allocated for the bloom. 158 func (b *SyncBloom) Close() error { 159 b.closer.Do(func() { 160 // Ensure the initializer is stopped 161 atomic.StoreUint32(&b.closed, 1) 162 b.pend.Wait() 163 164 // Wipe the bloom, but mark it "uninited" just in case someone attempts an access 165 log.Info("Deallocated fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate()) 166 167 atomic.StoreUint32(&b.inited, 0) 168 b.bloom = nil 169 }) 170 return nil 171 } 172 173 // Add inserts a new trie node hash into the bloom filter. 174 func (b *SyncBloom) Add(hash []byte) { 175 if atomic.LoadUint32(&b.closed) == 1 { 176 return 177 } 178 b.bloom.Add(syncBloomHasher(hash)) 179 bloomAddMeter.Mark(1) 180 } 181 182 // Contains tests if the bloom filter contains the given hash: 183 // - false: the bloom definitely does not contain hash 184 // - true: the bloom maybe contains hash 185 // 186 // While the bloom is being initialized, any query will return true. 187 func (b *SyncBloom) Contains(hash []byte) bool { 188 bloomTestMeter.Mark(1) 189 if atomic.LoadUint32(&b.inited) == 0 { 190 // We didn't load all the trie nodes from the previous run of Geth yet. As 191 // such, we can't say for sure if a hash is not present for anything. Until 192 // the init is done, we're faking "possible presence" for everything. 193 return true 194 } 195 // Bloom initialized, check the real one and report any successful misses 196 maybe := b.bloom.Contains(syncBloomHasher(hash)) 197 if !maybe { 198 bloomMissMeter.Mark(1) 199 } 200 return maybe 201 } 202 203 // errorRate calculates the probability of a random containment test returning a 204 // false positive. 205 // 206 // We're calculating it ourselves because the bloom library we used missed a 207 // parentheses in the formula and calculates it wrong. And it's discontinued... 208 func (b *SyncBloom) errorRate() float64 { 209 k := float64(b.bloom.K()) 210 n := float64(b.bloom.N()) 211 m := float64(b.bloom.M()) 212 213 return math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k) 214 }