github.com/MetalBlockchain/subnet-evm@v0.4.9/sync/statesync/state_syncer.go (about) 1 // (c) 2021-2022, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package statesync 5 6 import ( 7 "context" 8 "fmt" 9 "sync" 10 11 "github.com/MetalBlockchain/subnet-evm/core/state/snapshot" 12 "github.com/MetalBlockchain/subnet-evm/ethdb" 13 syncclient "github.com/MetalBlockchain/subnet-evm/sync/client" 14 "github.com/MetalBlockchain/subnet-evm/trie" 15 "github.com/ethereum/go-ethereum/common" 16 "golang.org/x/sync/errgroup" 17 ) 18 19 const ( 20 segmentThreshold = 500_000 // if we estimate trie to have greater than this number of leafs, split it 21 numStorageTrieSegments = 4 22 numMainTrieSegments = 8 23 defaultNumThreads = 8 24 ) 25 26 type StateSyncerConfig struct { 27 Root common.Hash 28 Client syncclient.Client 29 DB ethdb.Database 30 BatchSize int 31 MaxOutstandingCodeHashes int // Maximum number of code hashes in the code syncer queue 32 NumCodeFetchingWorkers int // Number of code syncing threads 33 } 34 35 // stateSync keeps the state of the entire state sync operation. 36 type stateSync struct { 37 db ethdb.Database // database we are syncing 38 root common.Hash // root of the EVM state we are syncing to 39 trieDB *trie.Database // trieDB on top of db we are syncing. used to restore any existing tries. 40 snapshot snapshot.Snapshot // used to access the database we are syncing as a snapshot. 41 batchSize int // write batches when they reach this size 42 client syncclient.Client // used to contact peers over the network 43 44 segments chan syncclient.LeafSyncTask // channel of tasks to sync 45 syncer *syncclient.CallbackLeafSyncer // performs the sync, looping over each task's range and invoking specified callbacks 46 codeSyncer *codeSyncer // manages the asynchronous download and batching of code hashes 47 trieQueue *trieQueue // manages a persistent list of storage tries we need to sync and any segments that are created for them 48 49 // track the main account trie specifically to commit its root at the end of the operation 50 mainTrie *trieToSync 51 52 // track the tries currently being synced 53 lock sync.RWMutex 54 triesInProgress map[common.Hash]*trieToSync 55 56 // track completion and progress of work 57 mainTrieDone chan struct{} 58 triesInProgressSem chan struct{} 59 done chan error 60 stats *trieSyncStats 61 } 62 63 func NewStateSyncer(config *StateSyncerConfig) (*stateSync, error) { 64 ss := &stateSync{ 65 batchSize: config.BatchSize, 66 db: config.DB, 67 client: config.Client, 68 root: config.Root, 69 trieDB: trie.NewDatabase(config.DB), 70 snapshot: snapshot.NewDiskLayer(config.DB), 71 stats: newTrieSyncStats(), 72 triesInProgress: make(map[common.Hash]*trieToSync), 73 74 // [triesInProgressSem] is used to keep the number of tries syncing 75 // less than or equal to [defaultNumThreads]. 76 triesInProgressSem: make(chan struct{}, defaultNumThreads), 77 78 // Each [trieToSync] will have a maximum of [numSegments] segments. 79 // We set the capacity of [segments] such that [defaultNumThreads] 80 // storage tries can sync concurrently. 81 segments: make(chan syncclient.LeafSyncTask, defaultNumThreads*numStorageTrieSegments), 82 mainTrieDone: make(chan struct{}), 83 done: make(chan error, 1), 84 } 85 ss.syncer = syncclient.NewCallbackLeafSyncer(config.Client, ss.segments) 86 ss.codeSyncer = newCodeSyncer(CodeSyncerConfig{ 87 DB: config.DB, 88 Client: config.Client, 89 MaxOutstandingCodeHashes: config.MaxOutstandingCodeHashes, 90 NumCodeFetchingWorkers: config.NumCodeFetchingWorkers, 91 }) 92 93 ss.trieQueue = NewTrieQueue(config.DB) 94 if err := ss.trieQueue.clearIfRootDoesNotMatch(ss.root); err != nil { 95 return nil, err 96 } 97 98 // create a trieToSync for the main trie and mark it as in progress. 99 var err error 100 ss.mainTrie, err = NewTrieToSync(ss, ss.root, common.Hash{}, NewMainTrieTask(ss)) 101 if err != nil { 102 return nil, err 103 } 104 ss.addTrieInProgress(ss.root, ss.mainTrie) 105 ss.mainTrie.startSyncing() // start syncing after tracking the trie as in progress 106 return ss, nil 107 } 108 109 // onStorageTrieFinished is called after a storage trie finishes syncing. 110 func (t *stateSync) onStorageTrieFinished(root common.Hash) error { 111 <-t.triesInProgressSem // allow another trie to start (release the semaphore) 112 // mark the storage trie as done in trieQueue 113 if err := t.trieQueue.StorageTrieDone(root); err != nil { 114 return err 115 } 116 // track the completion of this storage trie 117 return t.removeTrieInProgress(root) 118 } 119 120 // onMainTrieFinishes is called after the main trie finishes syncing. 121 func (t *stateSync) onMainTrieFinished() error { 122 t.codeSyncer.notifyAccountTrieCompleted() 123 124 // count the number of storage tries we need to sync for eta purposes. 125 numStorageTries, err := t.trieQueue.countTries() 126 if err != nil { 127 return err 128 } 129 t.stats.setTriesRemaining(numStorageTries) 130 131 // mark the main trie done 132 close(t.mainTrieDone) 133 return t.removeTrieInProgress(t.root) 134 } 135 136 // onSyncComplete is called after the account trie and 137 // all storage tries have completed syncing. We persist 138 // [mainTrie]'s batch last to avoid persisting the state 139 // root before all storage tries are done syncing. 140 func (t *stateSync) onSyncComplete() error { 141 return t.mainTrie.batch.Write() 142 } 143 144 // storageTrieProducer waits for the main trie to finish 145 // syncing then starts to add storage trie roots along 146 // with their corresponding accounts to the segments channel. 147 // returns nil if all storage tries were iterated and an 148 // error if one occurred or the context expired. 149 func (t *stateSync) storageTrieProducer(ctx context.Context) error { 150 // Wait for main trie to finish to ensure when this thread terminates 151 // there are no more storage tries to sync 152 select { 153 case <-t.mainTrieDone: 154 case <-ctx.Done(): 155 return ctx.Err() 156 } 157 158 for { 159 // check ctx here to exit the loop early 160 if err := ctx.Err(); err != nil { 161 return err 162 } 163 164 root, accounts, more, err := t.trieQueue.getNextTrie() 165 if err != nil { 166 return err 167 } 168 // If there are no storage tries, then root will be the empty hash on the first pass. 169 if root != (common.Hash{}) { 170 // acquire semaphore (to keep number of tries in progress limited) 171 select { 172 case t.triesInProgressSem <- struct{}{}: 173 case <-ctx.Done(): 174 return ctx.Err() 175 } 176 177 // Arbitrarily use the first account for making requests to the server. 178 // Note: getNextTrie guarantees that if a non-nil storage root is returned, then the 179 // slice of account hashes is non-empty. 180 syncAccount := accounts[0] 181 // create a trieToSync for the storage trie and mark it as in progress. 182 storageTrie, err := NewTrieToSync(t, root, syncAccount, NewStorageTrieTask(t, root, accounts)) 183 if err != nil { 184 return err 185 } 186 t.addTrieInProgress(root, storageTrie) 187 storageTrie.startSyncing() // start syncing after tracking the trie as in progress 188 } 189 // if there are no more storage tries, close 190 // the task queue and exit the producer. 191 if !more { 192 close(t.segments) 193 return nil 194 } 195 } 196 } 197 198 func (t *stateSync) Start(ctx context.Context) error { 199 // Start the code syncer and leaf syncer. 200 eg, egCtx := errgroup.WithContext(ctx) 201 t.codeSyncer.start(egCtx) // start the code syncer first since the leaf syncer may add code tasks 202 t.syncer.Start(egCtx, defaultNumThreads, t.onSyncFailure) 203 eg.Go(func() error { 204 if err := <-t.syncer.Done(); err != nil { 205 return err 206 } 207 return t.onSyncComplete() 208 }) 209 eg.Go(func() error { 210 err := <-t.codeSyncer.Done() 211 return err 212 }) 213 eg.Go(func() error { 214 return t.storageTrieProducer(egCtx) 215 }) 216 217 // The errgroup wait will take care of returning the first error that occurs, or returning 218 // nil if both finish without an error. 219 go func() { 220 t.done <- eg.Wait() 221 }() 222 return nil 223 } 224 225 func (t *stateSync) Done() <-chan error { return t.done } 226 227 // addTrieInProgress tracks the root as being currently synced. 228 func (t *stateSync) addTrieInProgress(root common.Hash, trie *trieToSync) { 229 t.lock.Lock() 230 defer t.lock.Unlock() 231 232 t.triesInProgress[root] = trie 233 } 234 235 // removeTrieInProgress removes root from the set of tracked 236 // tries in progress and notifies the storage root producer 237 // so it can continue in case it was paused due to the 238 // maximum number of tries in progress being previously reached. 239 func (t *stateSync) removeTrieInProgress(root common.Hash) error { 240 t.lock.Lock() 241 defer t.lock.Unlock() 242 243 t.stats.trieDone(root) 244 if _, ok := t.triesInProgress[root]; !ok { 245 return fmt.Errorf("removeTrieInProgress for unexpected root: %s", root) 246 } 247 delete(t.triesInProgress, root) 248 return nil 249 } 250 251 // onSyncFailure is called if the sync fails, this writes all 252 // batches of in-progress trie segments to disk to have maximum 253 // progress to restore. 254 func (t *stateSync) onSyncFailure(error) error { 255 t.lock.RLock() 256 defer t.lock.RUnlock() 257 258 for _, trie := range t.triesInProgress { 259 for _, segment := range trie.segments { 260 if err := segment.batch.Write(); err != nil { 261 return err 262 } 263 } 264 } 265 return nil 266 }