github.com/MetalBlockchain/subnet-evm@v0.4.9/sync/statesync/trie_segments.go (about) 1 // (c) 2021-2022, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package statesync 5 6 import ( 7 "bytes" 8 "encoding/binary" 9 "fmt" 10 "sync" 11 12 "github.com/MetalBlockchain/metalgo/utils/wrappers" 13 "github.com/MetalBlockchain/subnet-evm/core/rawdb" 14 "github.com/MetalBlockchain/subnet-evm/ethdb" 15 syncclient "github.com/MetalBlockchain/subnet-evm/sync/client" 16 "github.com/MetalBlockchain/subnet-evm/trie" 17 "github.com/MetalBlockchain/subnet-evm/utils" 18 "github.com/ethereum/go-ethereum/common" 19 "github.com/ethereum/go-ethereum/log" 20 ) 21 22 var ( 23 _ syncclient.LeafSyncTask = &trieSegment{} 24 _ fmt.Stringer = &trieSegment{} 25 ) 26 27 // trieToSync keeps the state of a single trie syncing 28 // this can be a storage or the main trie. 29 type trieToSync struct { 30 root common.Hash 31 account common.Hash 32 33 // The trie consists of a slice of segments. each 34 // segment has a start and end range of keys, and 35 // contains a pointer back to this struct. 36 segments []*trieSegment 37 38 // These fields are used to hash the segments in 39 // order, even though they may finish syncing out 40 // of order or concurrently. 41 lock sync.Mutex 42 segmentsDone map[int]struct{} 43 segmentToHashNext int 44 45 // We use a stack trie to hash the leafs and have 46 // a batch used for writing it to disk. 47 batch ethdb.Batch 48 stackTrie *trie.StackTrie 49 50 // We keep a pointer to the overall sync operation, 51 // used to add segments to the work queue and to 52 // update the eta. 53 sync *stateSync 54 55 // task implements the syncTask interface with methods 56 // containing logic specific to the main trie or storage 57 // tries. 58 task syncTask 59 isMainTrie bool 60 } 61 62 // NewTrieToSync initializes a trieToSync and restores any previously started segments. 63 func NewTrieToSync(sync *stateSync, root common.Hash, account common.Hash, syncTask syncTask) (*trieToSync, error) { 64 batch := sync.db.NewBatch() 65 trieToSync := &trieToSync{ 66 sync: sync, 67 root: root, 68 account: account, 69 batch: batch, 70 stackTrie: trie.NewStackTrie(batch), 71 isMainTrie: (root == sync.root), 72 task: syncTask, 73 segmentsDone: make(map[int]struct{}), 74 } 75 return trieToSync, trieToSync.loadSegments() 76 } 77 78 // loadSegments reads persistent storage and initializes trieSegments that 79 // had been previously started and need to be resumed. 80 func (t *trieToSync) loadSegments() error { 81 // Get an iterator for segments for t.root and see if we find anything. 82 // This lets us check if this trie was previously segmented, in which 83 // case we need to restore the same segments on resume. 84 it := rawdb.NewSyncSegmentsIterator(t.sync.db, t.root) 85 defer it.Release() 86 87 // Track the previously added segment as we loop over persisted values. 88 var prevSegmentStart []byte 89 90 for it.Next() { 91 // If we find any persisted segments with the specified 92 // prefix, we add a new segment to the trie here. 93 // The segment we add represents a segment ending at the 94 // key immediately prior to the segment we found on disk. 95 // This is because we do not persist the beginning of 96 // the first segment. 97 _, segmentStart := rawdb.UnpackSyncSegmentKey(it.Key()) 98 segmentStartPos := binary.BigEndian.Uint16(segmentStart[:wrappers.ShortLen]) 99 t.addSegment(prevSegmentStart, addPadding(segmentStartPos-1, 0xff)) 100 101 // keep tracking the previous segment 102 prevSegmentStart = segmentStart 103 } 104 if err := it.Error(); err != nil { 105 return err 106 } 107 108 // this creates the last segment if any were found in the loop 109 // and also handles the case where there were no segments persisted to disk. 110 t.addSegment(prevSegmentStart, nil) 111 112 for _, segment := range t.segments { 113 // for each segment we need to find the last key already persisted 114 // so syncing can begin at the subsequent key 115 var lastKey []byte 116 it := segment.trie.task.IterateLeafs(common.BytesToHash(segment.start)) 117 defer it.Release() 118 for it.Next() { 119 if len(segment.end) > 0 && bytes.Compare(it.Key(), segment.end) > 0 { 120 // don't go past the end of the segment 121 break 122 } 123 lastKey = common.CopyBytes(it.Key()) 124 segment.leafs++ 125 } 126 if lastKey != nil { 127 utils.IncrOne(lastKey) 128 segment.pos = lastKey // syncing will start from this key 129 } 130 log.Debug("statesync: loading segment", "segment", segment) 131 } 132 return it.Error() 133 } 134 135 // startSyncing adds the trieToSync's segments to the work queue 136 func (t *trieToSync) startSyncing() { 137 for _, segment := range t.segments { 138 t.sync.segments <- segment // this will queue the segment for syncing 139 } 140 } 141 142 // addSegment appends a newly created segment specified by [start] and 143 // [end] to [t.segments] and returns it. 144 // note: addSegment does not take a lock and therefore is called only 145 // before multiple segments are syncing concurrently. 146 func (t *trieToSync) addSegment(start, end []byte) *trieSegment { 147 segment := &trieSegment{ 148 start: start, 149 end: end, 150 trie: t, 151 idx: len(t.segments), 152 batch: t.sync.db.NewBatch(), 153 } 154 t.segments = append(t.segments, segment) 155 return segment 156 } 157 158 // segmentFinished is called when one the trie segment with index [idx] finishes syncing. 159 // creates intermediary hash nodes for the trie up to the last contiguous segment received from start. 160 func (t *trieToSync) segmentFinished(idx int) error { 161 t.lock.Lock() 162 defer t.lock.Unlock() 163 164 log.Debug("statesync: segment finished", "segment", t.segments[idx]) 165 t.segmentsDone[idx] = struct{}{} 166 for { 167 if _, ok := t.segmentsDone[t.segmentToHashNext]; !ok { 168 // if not the next contiguous segment from the beginning of the trie 169 // don't do anything. 170 break 171 } 172 segment := t.segments[t.segmentToHashNext] 173 174 // persist any items in the batch as they will be iterated below. 175 if err := segment.batch.Write(); err != nil { 176 return err 177 } 178 segment.batch.Reset() // reset the batch to free memory (even though it is no longer used) 179 180 // iterate all the items from the start of the segment (end is checked in the loop) 181 it := t.task.IterateLeafs(common.BytesToHash(segment.start)) 182 defer it.Release() 183 184 for it.Next() { 185 if len(segment.end) > 0 && bytes.Compare(it.Key(), segment.end) > 0 { 186 // don't go past the end of the segment. (data belongs to the next segment) 187 break 188 } 189 // update the stack trie and cap the batch it writes to. 190 value := common.CopyBytes(it.Value()) 191 if err := t.stackTrie.TryUpdate(it.Key(), value); err != nil { 192 return err 193 } 194 if t.batch.ValueSize() > t.sync.batchSize { 195 if err := t.batch.Write(); err != nil { 196 return err 197 } 198 t.batch.Reset() 199 } 200 } 201 if err := it.Error(); err != nil { 202 return err 203 } 204 t.segmentToHashNext++ 205 } 206 if t.segmentToHashNext < len(t.segments) { 207 // trie not complete 208 return nil 209 } 210 211 // when the trie is finished, this hashes any remaining nodes in the stack 212 // trie and creates the root 213 actualRoot, err := t.stackTrie.Commit() 214 if err != nil { 215 return err 216 } 217 if actualRoot != t.root { 218 return fmt.Errorf("unexpected root, expected=%s, actual=%s, account=%s", t.root, actualRoot, t.account) 219 } 220 if !t.isMainTrie { 221 // the batch containing the main trie's root will be committed on 222 // sync completion. 223 if err := t.batch.Write(); err != nil { 224 return err 225 } 226 } 227 228 // remove all segments for this root from persistent storage 229 if err := rawdb.ClearSyncSegments(t.sync.db, t.root); err != nil { 230 return err 231 } 232 return t.task.OnFinish() 233 } 234 235 // createSegmentsIfNeeded is called from the leaf handler. In case the trie syncing only has 236 // one segment but a large number of leafs ([t.estimateSize() > segmentThreshold], it will 237 // create [numSegments-1] additional segments to sync the trie. 238 func (t *trieToSync) createSegmentsIfNeeded(numSegments int) error { 239 if !t.shouldSegment() { 240 return nil 241 } 242 243 return t.createSegments(numSegments) 244 } 245 246 // shouldSegment returns true if a trie should be separated into segments. 247 func (t *trieToSync) shouldSegment() bool { 248 t.lock.Lock() 249 defer t.lock.Unlock() 250 251 // Return false if the trie has already been segmented. 252 if len(t.segments) > 1 { 253 return false 254 } 255 256 // Return true iff the estimated size of the trie exceeds [segmentThreshold]. 257 // Note: at this point there is only a single segment (loadSegments guarantees there 258 // is at least one segment). 259 segment := t.segments[0] 260 return segment.estimateSize() >= uint64(segmentThreshold) 261 } 262 263 // divide the key space into [numSegments] consecutive segments. 264 // we use 2 bytes to build the ranges and fill the rest with 265 // ones or zeroes accordingly. 266 // this represents the step between the first 2 bytes of the start 267 // key of consecutive segments. 268 // createSegments should only be called once when there is only one 269 // thread accessing this trie, such that there is no need to hold a lock. 270 func (t *trieToSync) createSegments(numSegments int) error { 271 segment := t.segments[0] 272 273 segmentStep := 0x10000 / numSegments 274 275 for i := 0; i < numSegments; i++ { 276 start := uint16(i * segmentStep) 277 end := uint16(i*segmentStep + (segmentStep - 1)) 278 279 startBytes := addPadding(start, 0x00) 280 endBytes := addPadding(end, 0xff) 281 282 // Skip any portion of the trie that has already been synced. 283 if bytes.Compare(segment.pos, endBytes) >= 0 { 284 continue 285 } 286 287 // since the first segment is already syncing, 288 // it does not need to be added to the task queue. 289 // instead, we update its end and move on to creating 290 // the next segment 291 if segment.end == nil { 292 segment.end = endBytes 293 continue 294 } 295 296 // create the segments 297 segment := t.addSegment(startBytes, endBytes) 298 if err := rawdb.WriteSyncSegment(t.sync.db, t.root, segment.start); err != nil { 299 return err 300 } 301 } 302 // add the newly created segments to the task queue 303 // after creating them. We skip the first one, as it 304 // is already syncing. 305 // this avoids concurrent access to [t.segments]. 306 for i := 1; i < len(t.segments); i++ { 307 t.sync.segments <- t.segments[i] 308 } 309 t.sync.stats.incTriesSegmented() 310 log.Debug("statesync: trie segmented for parallel sync", "root", t.root, "account", t.account, "segments", len(t.segments)) 311 return nil 312 } 313 314 // trieSegment keeps the state of syncing one segment of a [trieToSync] 315 // struct and keeps a pointer to the [trieToSync] it is syncing. 316 // each trieSegment is accessed by its own goroutine, so locks are not 317 // needed to access its fields 318 type trieSegment struct { 319 start []byte 320 pos []byte 321 end []byte 322 323 trie *trieToSync // points back to the trie the segment belongs to 324 idx int // index of this segment in the trie's segment slice 325 batch ethdb.Batch // batch for writing leafs to 326 leafs uint64 // number of leafs added to the segment 327 } 328 329 func (t *trieSegment) String() string { 330 return fmt.Sprintf( 331 "[%s](%d/%d) (start=%s,end=%s)", 332 t.trie.root, t.idx+1, len(t.trie.segments), 333 common.BytesToHash(t.start).TerminalString(), 334 common.BytesToHash(t.end).TerminalString(), 335 ) 336 } 337 338 // these functions implement the LeafSyncTask interface. 339 func (t *trieSegment) Root() common.Hash { return t.trie.root } 340 func (t *trieSegment) Account() common.Hash { return t.trie.account } 341 func (t *trieSegment) End() []byte { return t.end } 342 func (t *trieSegment) OnStart() (bool, error) { return t.trie.task.OnStart() } 343 func (t *trieSegment) OnFinish() error { return t.trie.segmentFinished(t.idx) } 344 345 func (t *trieSegment) Start() []byte { 346 if t.pos != nil { 347 return t.pos 348 } 349 return t.start 350 } 351 352 func (t *trieSegment) OnLeafs(keys, vals [][]byte) error { 353 // invoke the onLeafs callback 354 if err := t.trie.task.OnLeafs(t.batch, keys, vals); err != nil { 355 return err 356 } 357 // cap the segment's batch 358 if t.batch.ValueSize() > t.trie.sync.batchSize { 359 if err := t.batch.Write(); err != nil { 360 return err 361 } 362 t.batch.Reset() 363 } 364 t.leafs += uint64(len(keys)) 365 if len(keys) > 0 { 366 t.pos = keys[len(keys)-1] // remember the position, used in estimating trie size 367 utils.IncrOne(t.pos) 368 } 369 370 // update eta 371 t.trie.sync.stats.incLeafs(t, uint64(len(keys)), t.estimateSize()) 372 373 if t.trie.root == t.trie.sync.root { 374 return t.trie.createSegmentsIfNeeded(numMainTrieSegments) 375 } else { 376 return t.trie.createSegmentsIfNeeded(numStorageTrieSegments) 377 } 378 } 379 380 // estimateSize calculates an estimate of the number of leafs and returns it, 381 // this assumes the trie has uniform key density. 382 // Note: returns 0 if there has been no progress in syncing the trie. 383 func (t *trieSegment) estimateSize() uint64 { 384 start, pos, end := uint16(0), uint16(0), uint16(0xffff) 385 if len(t.start) > 0 { 386 start = binary.BigEndian.Uint16(t.start) 387 } 388 if len(t.pos) > 0 { 389 pos = binary.BigEndian.Uint16(t.pos) 390 } 391 if len(t.end) > 0 { 392 end = binary.BigEndian.Uint16(t.end) 393 } 394 progress := pos - start 395 if progress == 0 { 396 // this should not occur since estimateSize is called after processing 397 // a batch of leafs, which sets [pos]. 398 // avoid division by 0 out of caution. 399 return 0 400 } 401 left := end - pos 402 return t.leafs * uint64(left) / uint64(progress) 403 } 404 405 // addPadding returns a []byte of length [common.Hash], starting with the BigEndian 406 // representation of [pos], and the rest filled with [padding]. 407 func addPadding(pos uint16, padding byte) []byte { 408 packer := wrappers.Packer{Bytes: make([]byte, common.HashLength)} 409 packer.PackShort(pos) 410 packer.PackFixedBytes(bytes.Repeat([]byte{padding}, common.HashLength-wrappers.ShortLen)) 411 return packer.Bytes 412 }