github.com/klaytn/klaytn@v1.12.1/storage/statedb/sync_bloom.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 // 17 // This file is derived from trie/sync_test.go (2020/05/20). 18 // Modified and improved for the klaytn development. 19 package statedb 20 21 import ( 22 "encoding/binary" 23 "fmt" 24 "math" 25 "sync" 26 "sync/atomic" 27 "time" 28 29 "github.com/klaytn/klaytn/common" 30 "github.com/klaytn/klaytn/log" 31 "github.com/klaytn/klaytn/storage/database" 32 "github.com/rcrowley/go-metrics" 33 34 "github.com/steakknife/bloomfilter" 35 ) 36 37 var ( 38 bloomAddMeter = metrics.NewRegisteredMeter("trie/bloom/add", nil) 39 bloomLoadMeter = metrics.NewRegisteredMeter("trie/bloom/load", nil) 40 bloomTestMeter = metrics.NewRegisteredMeter("trie/bloom/test", nil) 41 bloomMissMeter = metrics.NewRegisteredMeter("trie/bloom/miss", nil) 42 bloomFaultMeter = metrics.NewRegisteredMeter("trie/bloom/fault", nil) 43 bloomErrorGauge = metrics.NewRegisteredGauge("trie/bloom/error", nil) 44 ) 45 46 // syncBloomHasher is a wrapper around a byte blob to satisfy the interface API 47 // requirements of the bloom library used. It's used to convert a trie hash or 48 // contract code hash into a 64 bit mini hash. 49 type syncBloomHasher []byte 50 51 func (f syncBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } 52 func (f syncBloomHasher) Sum(b []byte) []byte { panic("not implemented") } 53 func (f syncBloomHasher) Reset() { panic("not implemented") } 54 func (f syncBloomHasher) BlockSize() int { panic("not implemented") } 55 func (f syncBloomHasher) Size() int { return 8 } 56 func (f syncBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) } 57 58 // SyncBloom is a bloom filter used during fast sync to quickly decide if a trie 59 // node or contract code already exists on disk or not. It self populates from the 60 // provided disk database on creation in a background thread and will only start 61 // returning live results once that's finished. 62 type SyncBloom struct { 63 bloom *bloomfilter.Filter 64 inited uint32 65 closer sync.Once 66 closed uint32 67 pend sync.WaitGroup 68 } 69 70 // NewSyncBloom creates a new bloom filter of the given size (in megabytes) and 71 // initializes it from the database. The bloom is hard coded to use 3 filters. 72 func NewSyncBloom(memory uint64, database database.Iteratee) *SyncBloom { 73 // Create the bloom filter to track known trie nodes 74 bloom, err := bloomfilter.New(memory*1024*1024*8, 3) 75 if err != nil { 76 panic(fmt.Sprintf("failed to create bloom: %v", err)) 77 } 78 logger.Info("Allocated fast sync bloom", "size", common.StorageSize(memory*1024*1024)) 79 80 // Assemble the fast sync bloom and init it from previous sessions 81 b := &SyncBloom{ 82 bloom: bloom, 83 } 84 b.pend.Add(2) 85 go func() { 86 defer b.pend.Done() 87 b.init(database) 88 }() 89 go func() { 90 defer b.pend.Done() 91 b.meter() 92 }() 93 return b 94 } 95 96 // init iterates over the database, pushing every trie hash into the bloom filter. 97 func (b *SyncBloom) init(db database.Iteratee) { 98 // Iterate over the database, but restart every now and again to avoid holding 99 // a persistent snapshot since fast sync can push a ton of data concurrently, 100 // bloating the disk. 101 // 102 // Note, this is fine, because everything inserted into leveldb by fast sync is 103 // also pushed into the bloom directly, so we're not missing anything when the 104 // iterator is swapped out for a new one. 105 it := db.NewIterator(nil, nil) 106 107 var ( 108 start = time.Now() 109 swap = time.Now() 110 ) 111 for it.Next() && atomic.LoadUint32(&b.closed) == 0 { 112 // If the database entry is a trie node, add it to the bloom 113 key := it.Key() 114 if len(key) == common.HashLength { 115 b.bloom.Add(syncBloomHasher(key)) 116 bloomLoadMeter.Mark(1) 117 } 118 // If the database entry is a contract code, add it to the bloom 119 if ok, hash := database.IsCodeKey(key); ok { 120 b.bloom.Add(syncBloomHasher(hash)) 121 bloomLoadMeter.Mark(1) 122 } 123 124 // If enough time elapsed since the last iterator swap, restart 125 if time.Since(swap) > log.StatsReportLimit { 126 key := common.CopyBytes(it.Key()) 127 128 it.Release() 129 it = db.NewIterator(nil, key) 130 131 logger.Info("Initializing fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start))) 132 swap = time.Now() 133 } 134 } 135 it.Release() 136 137 // Mark the bloom filter inited and return 138 logger.Info("Initialized fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start))) 139 atomic.StoreUint32(&b.inited, 1) 140 } 141 142 // meter periodically recalculates the false positive error rate of the bloom 143 // filter and reports it in a metric. 144 func (b *SyncBloom) meter() { 145 for { 146 // Report the current error ration. No floats, lame, scale it up. 147 bloomErrorGauge.Update(int64(b.errorRate() * 100000)) 148 149 // Wait one second, but check termination more frequently 150 for i := 0; i < 10; i++ { 151 if atomic.LoadUint32(&b.closed) == 1 { 152 return 153 } 154 time.Sleep(100 * time.Millisecond) 155 } 156 } 157 } 158 159 // Close terminates any background initializer still running and releases all the 160 // memory allocated for the bloom. 161 func (b *SyncBloom) Close() error { 162 b.closer.Do(func() { 163 // Ensure the initializer is stopped 164 atomic.StoreUint32(&b.closed, 1) 165 b.pend.Wait() 166 167 // Wipe the bloom, but mark it "uninited" just in case someone attempts an access 168 logger.Info("Deallocated fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate()) 169 170 atomic.StoreUint32(&b.inited, 0) 171 b.bloom = nil 172 }) 173 return nil 174 } 175 176 // Add inserts a new trie node hash into the bloom filter. 177 func (b *SyncBloom) Add(hash []byte) { 178 if atomic.LoadUint32(&b.closed) == 1 { 179 return 180 } 181 b.bloom.Add(syncBloomHasher(hash)) 182 bloomAddMeter.Mark(1) 183 } 184 185 // Contains tests if the bloom filter contains the given hash: 186 // - false: the bloom definitely does not contain hash 187 // - true: the bloom maybe contains hash 188 // 189 // While the bloom is being initialized, any query will return true. 190 func (b *SyncBloom) Contains(hash []byte) bool { 191 bloomTestMeter.Mark(1) 192 if atomic.LoadUint32(&b.inited) == 0 { 193 // We didn't load all the trie nodes from the previous run of Geth yet. As 194 // such, we can't say for sure if a hash is not present for anything. Until 195 // the init is done, we're faking "possible presence" for everything. 196 return true 197 } 198 // Bloom initialized, check the real one and report any successful misses 199 maybe := b.bloom.Contains(syncBloomHasher(hash)) 200 if !maybe { 201 bloomMissMeter.Mark(1) 202 } 203 return maybe 204 } 205 206 // errorRate calculates the probability of a random containment test returning a 207 // false positive. 208 // 209 // We're calculating it ourselves because the bloom library we used missed a 210 // parentheses in the formula and calculates it wrong. And it's discontinued... 211 func (b *SyncBloom) errorRate() float64 { 212 k := float64(b.bloom.K()) 213 n := float64(b.bloom.N()) 214 m := float64(b.bloom.M()) 215 216 return math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k) 217 }