github.com/core-coin/go-core/v2@v2.1.9/trie/sync_bloom.go (about) 1 // Copyright 2019 by the Authors 2 // This file is part of the go-core library. 3 // 4 // The go-core library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-core library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-core library. If not, see <http://www.gnu.org/licenses/>. 16 17 package trie 18 19 import ( 20 "encoding/binary" 21 "fmt" 22 "math" 23 "sync" 24 "sync/atomic" 25 "time" 26 27 "github.com/steakknife/bloomfilter" 28 29 "github.com/core-coin/go-core/v2/xcbdb" 30 31 "github.com/core-coin/go-core/v2/common" 32 "github.com/core-coin/go-core/v2/core/rawdb" 33 "github.com/core-coin/go-core/v2/log" 34 "github.com/core-coin/go-core/v2/metrics" 35 ) 36 37 var ( 38 bloomAddMeter = metrics.NewRegisteredMeter("trie/bloom/add", nil) 39 bloomLoadMeter = metrics.NewRegisteredMeter("trie/bloom/load", nil) 40 bloomTestMeter = metrics.NewRegisteredMeter("trie/bloom/test", nil) 41 bloomMissMeter = metrics.NewRegisteredMeter("trie/bloom/miss", nil) 42 bloomFaultMeter = metrics.NewRegisteredMeter("trie/bloom/fault", nil) 43 bloomErrorGauge = metrics.NewRegisteredGauge("trie/bloom/error", nil) 44 ) 45 46 // syncBloomHasher is a wrapper around a byte blob to satisfy the interface API 47 // requirements of the bloom library used. It's used to convert a trie hash or 48 // contract code hash into a 64 bit mini hash. 49 type syncBloomHasher []byte 50 51 func (f syncBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } 52 func (f syncBloomHasher) Sum(b []byte) []byte { panic("not implemented") } 53 func (f syncBloomHasher) Reset() { panic("not implemented") } 54 func (f syncBloomHasher) BlockSize() int { panic("not implemented") } 55 func (f syncBloomHasher) Size() int { return 8 } 56 func (f syncBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) } 57 58 // SyncBloom is a bloom filter used during fast sync to quickly decide if a trie 59 // node or contract code already exists on disk or not. It self populates from the 60 // provided disk database on creation in a background thread and will only start 61 // returning live results once that's finished. 62 type SyncBloom struct { 63 bloom *bloomfilter.Filter 64 inited uint32 65 closer sync.Once 66 closed uint32 67 pend sync.WaitGroup 68 } 69 70 // NewSyncBloom creates a new bloom filter of the given size (in megabytes) and 71 // initializes it from the database. The bloom is hard coded to use 3 filters. 72 func NewSyncBloom(memory uint64, database xcbdb.Iteratee) *SyncBloom { 73 // Create the bloom filter to track known trie nodes 74 bloom, err := bloomfilter.New(memory*1024*1024*8, 3) 75 if err != nil { 76 panic(fmt.Sprintf("failed to create bloom: %v", err)) 77 } 78 log.Info("Allocated fast sync bloom", "size", common.StorageSize(memory*1024*1024)) 79 80 // Assemble the fast sync bloom and init it from previous sessions 81 b := &SyncBloom{ 82 bloom: bloom, 83 } 84 b.pend.Add(2) 85 go func() { 86 defer b.pend.Done() 87 b.init(database) 88 }() 89 go func() { 90 defer b.pend.Done() 91 b.meter() 92 }() 93 return b 94 } 95 96 // init iterates over the database, pushing every trie hash into the bloom filter. 97 func (b *SyncBloom) init(database xcbdb.Iteratee) { 98 // Iterate over the database, but restart every now and again to avoid holding 99 // a persistent snapshot since fast sync can push a ton of data concurrently, 100 // bloating the disk. 101 // 102 // Note, this is fine, because everything inserted into leveldb by fast sync is 103 // also pushed into the bloom directly, so we're not missing anything when the 104 // iterator is swapped out for a new one. 105 it := database.NewIterator(nil, nil) 106 107 var ( 108 start = time.Now() 109 swap = time.Now() 110 ) 111 for it.Next() && atomic.LoadUint32(&b.closed) == 0 { 112 // If the database entry is a trie node, add it to the bloom 113 key := it.Key() 114 if len(key) == common.HashLength { 115 b.bloom.Add(syncBloomHasher(key)) 116 bloomLoadMeter.Mark(1) 117 } 118 // If the database entry is a contract code, add it to the bloom 119 if ok, hash := rawdb.IsCodeKey(key); ok { 120 b.bloom.Add(syncBloomHasher(hash)) 121 bloomLoadMeter.Mark(1) 122 } 123 // If enough time elapsed since the last iterator swap, restart 124 if time.Since(swap) > 8*time.Second { 125 key := common.CopyBytes(it.Key()) 126 127 it.Release() 128 it = database.NewIterator(nil, key) 129 130 log.Info("Initializing fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start))) 131 swap = time.Now() 132 } 133 } 134 it.Release() 135 136 // Mark the bloom filter inited and return 137 log.Info("Initialized fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start))) 138 atomic.StoreUint32(&b.inited, 1) 139 } 140 141 // meter periodically recalculates the false positive error rate of the bloom 142 // filter and reports it in a metric. 143 func (b *SyncBloom) meter() { 144 for { 145 // Report the current error ration. No floats, lame, scale it up. 146 bloomErrorGauge.Update(int64(b.errorRate() * 100000)) 147 148 // Wait one second, but check termination more frequently 149 for i := 0; i < 10; i++ { 150 if atomic.LoadUint32(&b.closed) == 1 { 151 return 152 } 153 time.Sleep(100 * time.Millisecond) 154 } 155 } 156 } 157 158 // Close terminates any background initializer still running and releases all the 159 // memory allocated for the bloom. 160 func (b *SyncBloom) Close() error { 161 b.closer.Do(func() { 162 // Ensure the initializer is stopped 163 atomic.StoreUint32(&b.closed, 1) 164 b.pend.Wait() 165 166 // Wipe the bloom, but mark it "uninited" just in case someone attempts an access 167 log.Info("Deallocated fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate()) 168 169 atomic.StoreUint32(&b.inited, 0) 170 b.bloom = nil 171 }) 172 return nil 173 } 174 175 // Add inserts a new trie node hash into the bloom filter. 176 func (b *SyncBloom) Add(hash []byte) { 177 if atomic.LoadUint32(&b.closed) == 1 { 178 return 179 } 180 b.bloom.Add(syncBloomHasher(hash)) 181 bloomAddMeter.Mark(1) 182 } 183 184 // Contains tests if the bloom filter contains the given hash: 185 // - false: the bloom definitely does not contain hash 186 // - true: the bloom maybe contains hash 187 // 188 // While the bloom is being initialized, any query will return true. 189 func (b *SyncBloom) Contains(hash []byte) bool { 190 bloomTestMeter.Mark(1) 191 if atomic.LoadUint32(&b.inited) == 0 { 192 // We didn't load all the trie nodes from the previous run of Gocore yet. As 193 // such, we can't say for sure if a hash is not present for anything. Until 194 // the init is done, we're faking "possible presence" for everything. 195 return true 196 } 197 // Bloom initialized, check the real one and report any successful misses 198 maybe := b.bloom.Contains(syncBloomHasher(hash)) 199 if !maybe { 200 bloomMissMeter.Mark(1) 201 } 202 return maybe 203 } 204 205 // errorRate calculates the probability of a random containment test returning a 206 // false positive. 207 // 208 // We're calculating it ourselves because the bloom library we used missed a 209 // parentheses in the formula and calculates it wrong. And it's discontinued... 210 func (b *SyncBloom) errorRate() float64 { 211 k := float64(b.bloom.K()) 212 n := float64(b.bloom.N()) 213 m := float64(b.bloom.M()) 214 215 return math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k) 216 }