github.com/chain5j/chain5j-pkg@v1.0.7/collection/trees/tree/sync_bloom.go (about) 1 // Copyright 2019 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package tree 18 19 import ( 20 "encoding/binary" 21 "fmt" 22 "math" 23 "sync" 24 "sync/atomic" 25 "time" 26 27 "github.com/chain5j/chain5j-pkg/database/kvstore" 28 "github.com/chain5j/chain5j-pkg/types" 29 "github.com/chain5j/chain5j-pkg/util/dateutil" 30 "github.com/chain5j/chain5j-pkg/util/hexutil" 31 "github.com/steakknife/bloomfilter" 32 ) 33 34 // syncBloomHasher is a wrapper around a byte blob to satisfy the interface API 35 // requirements of the bloom library used. It's used to convert a trie hash into 36 // a 64 bit mini hash. 37 type syncBloomHasher []byte 38 39 func (f syncBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") } 40 func (f syncBloomHasher) Sum(b []byte) []byte { panic("not implemented") } 41 func (f syncBloomHasher) Reset() { panic("not implemented") } 42 func (f syncBloomHasher) BlockSize() int { panic("not implemented") } 43 func (f syncBloomHasher) Size() int { return 8 } 44 func (f syncBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) } 45 46 // SyncBloom is a bloom filter used during fast sync to quickly decide if a trie 47 // node already exists on disk or not. It self populates from the provided disk 48 // database on creation in a background thread and will only start returning live 49 // results once that's finished. 50 type SyncBloom struct { 51 bloom *bloomfilter.Filter 52 inited uint32 53 closer sync.Once 54 closed uint32 55 pend sync.WaitGroup 56 } 57 58 // NewSyncBloom creates a new bloom filter of the given size (in megabytes) and 59 // initializes it from the database. The bloom is hard coded to use 3 filters. 60 func NewSyncBloom(memory uint64, database kvstore.Iteratee) *SyncBloom { 61 // Create the bloom filter to track known trie nodes 62 bloom, err := bloomfilter.New(memory*1024*1024*8, 3) 63 if err != nil { 64 panic(fmt.Sprintf("failed to create bloom: %v", err)) 65 } 66 logger().Info("Allocated fast sync bloom", "size", types.StorageSize(memory*1024*1024)) 67 68 // Assemble the fast sync bloom and init it from previous sessions 69 b := &SyncBloom{ 70 bloom: bloom, 71 } 72 b.pend.Add(2) 73 go func() { 74 defer b.pend.Done() 75 b.init(database) 76 }() 77 go func() { 78 defer b.pend.Done() 79 b.meter() 80 }() 81 return b 82 } 83 84 // init iterates over the database, pushing every trie hash into the bloom filter. 85 func (b *SyncBloom) init(database kvstore.Iteratee) { 86 // Iterate over the database, but restart every now and again to avoid holding 87 // a persistent snapshot since fast sync can push a ton of data concurrently, 88 // bloating the disk. 89 // 90 // Note, this is fine, because everything inserted into leveldb by fast sync is 91 // also pushed into the bloom directly, so we're not missing anything when the 92 // iterator is swapped out for a new one. 93 it := database.NewIterator() 94 95 var ( 96 start = time.Now() 97 swap = time.Now() 98 ) 99 for it.Next() && atomic.LoadUint32(&b.closed) == 0 { 100 // If the database entry is a trie node, add it to the bloom 101 if key := it.Key(); len(key) == types.HashLength { 102 b.bloom.Add(syncBloomHasher(key)) 103 } 104 // If enough time elapsed since the last iterator swap, restart 105 if time.Since(swap) > 8*time.Second { 106 key := hexutil.CopyBytes(it.Key()) 107 108 it.Release() 109 it = database.NewIteratorWithStart(key) 110 111 logger().Info("Initializing fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", dateutil.PrettyDuration(time.Since(start))) 112 swap = time.Now() 113 } 114 } 115 it.Release() 116 117 // Mark the bloom filter inited and return 118 logger().Info("Initialized fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", dateutil.PrettyDuration(time.Since(start))) 119 atomic.StoreUint32(&b.inited, 1) 120 } 121 122 // meter periodically recalculates the false positive error rate of the bloom 123 // filter and reports it in a metric. 124 func (b *SyncBloom) meter() { 125 for { 126 // Wait one second, but check termination more frequently 127 for i := 0; i < 10; i++ { 128 if atomic.LoadUint32(&b.closed) == 1 { 129 return 130 } 131 time.Sleep(100 * time.Millisecond) 132 } 133 } 134 } 135 136 // Close terminates any background initializer still running and releases all the 137 // memory allocated for the bloom. 138 func (b *SyncBloom) Close() error { 139 b.closer.Do(func() { 140 // Ensure the initializer is stopped 141 atomic.StoreUint32(&b.closed, 1) 142 b.pend.Wait() 143 144 // Wipe the bloom, but mark it "uninited" just in case someone attempts an access 145 logger().Info("Deallocated fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate()) 146 147 atomic.StoreUint32(&b.inited, 0) 148 b.bloom = nil 149 }) 150 return nil 151 } 152 153 // Add inserts a new trie node hash into the bloom filter. 154 func (b *SyncBloom) Add(hash []byte) { 155 if atomic.LoadUint32(&b.closed) == 1 { 156 return 157 } 158 b.bloom.Add(syncBloomHasher(hash)) 159 } 160 161 // Contains tests if the bloom filter contains the given hash: 162 // - false: the bloom definitely does not contain hash 163 // - true: the bloom maybe contains hash 164 // 165 // While the bloom is being initialized, any query will return true. 166 func (b *SyncBloom) Contains(hash []byte) bool { 167 if atomic.LoadUint32(&b.inited) == 0 { 168 // We didn't load all the trie nodes from the previous run of Geth yet. As 169 // such, we can't say for sure if a hash is not present for anything. Until 170 // the init is done, we're faking "possible presence" for everything. 171 return true 172 } 173 // Bloom initialized, check the real one and report any successful misses 174 maybe := b.bloom.Contains(syncBloomHasher(hash)) 175 return maybe 176 } 177 178 // errorRate calculates the probability of a random containment test returning a 179 // false positive. 180 // 181 // We're calculating it ourselves because the bloom library we used missed a 182 // parentheses in the formula and calculates it wrong. And it's discontinued... 183 func (b *SyncBloom) errorRate() float64 { 184 k := float64(b.bloom.K()) 185 n := float64(b.bloom.N()) 186 m := float64(b.bloom.M()) 187 188 return math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k) 189 }