github.com/aigarnetwork/aigar@v0.0.0-20191115204914-d59a6eb70f8e/trie/sync_bloom.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // Copyright 2019 The go-aigar Authors 3 // This file is part of the go-aigar library. 4 // 5 // The go-aigar library is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Lesser General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // The go-aigar library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with the go-aigar library. If not, see <http://www.gnu.org/licenses/>. 17 18 package trie 19 20 import ( 21 "encoding/binary" 22 "fmt" 23 "math" 24 "sync" 25 "sync/atomic" 26 "time" 27 28 "github.com/AigarNetwork/aigar/common" 29 "github.com/AigarNetwork/aigar/ethdb" 30 "github.com/AigarNetwork/aigar/log" 31 "github.com/AigarNetwork/aigar/metrics" 32 "github.com/steakknife/bloomfilter" 33 ) 34 35 var ( 36 bloomAddMeter = metrics.NewRegisteredMeter("trie/bloom/add", nil) 37 bloomLoadMeter = metrics.NewRegisteredMeter("trie/bloom/load", nil) 38 bloomTestMeter = metrics.NewRegisteredMeter("trie/bloom/test", nil) 39 bloomMissMeter = metrics.NewRegisteredMeter("trie/bloom/miss", nil) 40 bloomFaultMeter = metrics.NewRegisteredMeter("trie/bloom/fault", nil) 41 bloomErrorGauge = metrics.NewRegisteredGauge("trie/bloom/error", nil) 42 ) 43 44 // syncBloomHasher is a wrapper around a byte blob to satisfy the interface API 45 // requirements of the bloom library used. It's used to convert a trie hash into 46 // a 64 bit mini hash. 47 type syncBloomHasher []byte 48 49 func (f syncBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } 50 func (f syncBloomHasher) Sum(b []byte) []byte { panic("not implemented") } 51 func (f syncBloomHasher) Reset() { panic("not implemented") } 52 func (f syncBloomHasher) BlockSize() int { panic("not implemented") } 53 func (f syncBloomHasher) Size() int { return 8 } 54 func (f syncBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) } 55 56 // SyncBloom is a bloom filter used during fast sync to quickly decide if a trie 57 // node already exists on disk or not. It self populates from the provided disk 58 // database on creation in a background thread and will only start returning live 59 // results once that's finished. 60 type SyncBloom struct { 61 bloom *bloomfilter.Filter 62 inited uint32 63 closer sync.Once 64 closed uint32 65 pend sync.WaitGroup 66 } 67 68 // NewSyncBloom creates a new bloom filter of the given size (in megabytes) and 69 // initializes it from the database. The bloom is hard coded to use 3 filters. 70 func NewSyncBloom(memory uint64, database ethdb.Iteratee) *SyncBloom { 71 // Create the bloom filter to track known trie nodes 72 bloom, err := bloomfilter.New(memory*1024*1024*8, 3) 73 if err != nil { 74 panic(fmt.Sprintf("failed to create bloom: %v", err)) 75 } 76 log.Info("Allocated fast sync bloom", "size", common.StorageSize(memory*1024*1024)) 77 78 // Assemble the fast sync bloom and init it from previous sessions 79 b := &SyncBloom{ 80 bloom: bloom, 81 } 82 b.pend.Add(2) 83 go func() { 84 defer b.pend.Done() 85 b.init(database) 86 }() 87 go func() { 88 defer b.pend.Done() 89 b.meter() 90 }() 91 return b 92 } 93 94 // init iterates over the database, pushing every trie hash into the bloom filter. 95 func (b *SyncBloom) init(database ethdb.Iteratee) { 96 // Iterate over the database, but restart every now and again to avoid holding 97 // a persistent snapshot since fast sync can push a ton of data concurrently, 98 // bloating the disk. 99 // 100 // Note, this is fine, because everything inserted into leveldb by fast sync is 101 // also pushed into the bloom directly, so we're not missing anything when the 102 // iterator is swapped out for a new one. 103 it := database.NewIterator() 104 105 var ( 106 start = time.Now() 107 swap = time.Now() 108 ) 109 for it.Next() && atomic.LoadUint32(&b.closed) == 0 { 110 // If the database entry is a trie node, add it to the bloom 111 if key := it.Key(); len(key) == common.HashLength { 112 b.bloom.Add(syncBloomHasher(key)) 113 bloomLoadMeter.Mark(1) 114 } 115 // If enough time elapsed since the last iterator swap, restart 116 if time.Since(swap) > 8*time.Second { 117 key := common.CopyBytes(it.Key()) 118 119 it.Release() 120 it = database.NewIteratorWithStart(key) 121 122 log.Info("Initializing fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start))) 123 swap = time.Now() 124 } 125 } 126 it.Release() 127 128 // Mark the bloom filter inited and return 129 log.Info("Initialized fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start))) 130 atomic.StoreUint32(&b.inited, 1) 131 } 132 133 // meter periodically recalculates the false positive error rate of the bloom 134 // filter and reports it in a metric. 135 func (b *SyncBloom) meter() { 136 for { 137 // Report the current error ration. No floats, lame, scale it up. 138 bloomErrorGauge.Update(int64(b.errorRate() * 100000)) 139 140 // Wait one second, but check termination more frequently 141 for i := 0; i < 10; i++ { 142 if atomic.LoadUint32(&b.closed) == 1 { 143 return 144 } 145 time.Sleep(100 * time.Millisecond) 146 } 147 } 148 } 149 150 // Close terminates any background initializer still running and releases all the 151 // memory allocated for the bloom. 152 func (b *SyncBloom) Close() error { 153 b.closer.Do(func() { 154 // Ensure the initializer is stopped 155 atomic.StoreUint32(&b.closed, 1) 156 b.pend.Wait() 157 158 // Wipe the bloom, but mark it "uninited" just in case someone attempts an access 159 log.Info("Deallocated fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate()) 160 161 atomic.StoreUint32(&b.inited, 0) 162 b.bloom = nil 163 }) 164 return nil 165 } 166 167 // Add inserts a new trie node hash into the bloom filter. 168 func (b *SyncBloom) Add(hash []byte) { 169 if atomic.LoadUint32(&b.closed) == 1 { 170 return 171 } 172 b.bloom.Add(syncBloomHasher(hash)) 173 bloomAddMeter.Mark(1) 174 } 175 176 // Contains tests if the bloom filter contains the given hash: 177 // - false: the bloom definitely does not contain hash 178 // - true: the bloom maybe contains hash 179 // 180 // While the bloom is being initialized, any query will return true. 181 func (b *SyncBloom) Contains(hash []byte) bool { 182 bloomTestMeter.Mark(1) 183 if atomic.LoadUint32(&b.inited) == 0 { 184 // We didn't load all the trie nodes from the previous run of Geth yet. As 185 // such, we can't say for sure if a hash is not present for anything. Until 186 // the init is done, we're faking "possible presence" for everything. 187 return true 188 } 189 // Bloom initialized, check the real one and report any successful misses 190 maybe := b.bloom.Contains(syncBloomHasher(hash)) 191 if !maybe { 192 bloomMissMeter.Mark(1) 193 } 194 return maybe 195 } 196 197 // errorRate calculates the probability of a random containment test returning a 198 // false positive. 199 // 200 // We're calculating it ourselves because the bloom library we used missed a 201 // parentheses in the formula and calculates it wrong. And it's discontinued... 202 func (b *SyncBloom) errorRate() float64 { 203 k := float64(b.bloom.K()) 204 n := float64(b.bloom.N()) 205 m := float64(b.bloom.M()) 206 207 return math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k) 208 }