github.com/dim4egster/coreth@v0.10.2/sync/statesync/trie_segments.go (about) 1 // (c) 2021-2022, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package statesync 5 6 import ( 7 "bytes" 8 "encoding/binary" 9 "fmt" 10 "sync" 11 12 "github.com/dim4egster/qmallgo/utils/wrappers" 13 "github.com/dim4egster/coreth/core/rawdb" 14 "github.com/dim4egster/coreth/ethdb" 15 "github.com/dim4egster/coreth/plugin/evm/message" 16 syncclient "github.com/dim4egster/coreth/sync/client" 17 "github.com/dim4egster/coreth/trie" 18 "github.com/dim4egster/coreth/utils" 19 "github.com/ethereum/go-ethereum/common" 20 "github.com/ethereum/go-ethereum/log" 21 ) 22 23 var ( 24 _ syncclient.LeafSyncTask = &trieSegment{} 25 _ fmt.Stringer = &trieSegment{} 26 ) 27 28 // trieToSync keeps the state of a single trie syncing 29 // this can be a storage or the main trie. 30 type trieToSync struct { 31 root common.Hash 32 account common.Hash 33 34 // The trie consists of a slice of segments. each 35 // segment has a start and end range of keys, and 36 // contains a pointer back to this struct. 37 segments []*trieSegment 38 39 // These fields are used to hash the segments in 40 // order, even though they may finish syncing out 41 // of order or concurrently. 42 lock sync.Mutex 43 segmentsDone map[int]struct{} 44 segmentToHashNext int 45 46 // We use a stack trie to hash the leafs and have 47 // a batch used for writing it to disk. 48 batch ethdb.Batch 49 stackTrie *trie.StackTrie 50 51 // We keep a pointer to the overall sync operation, 52 // used to add segments to the work queue and to 53 // update the eta. 54 sync *stateSync 55 56 // task implements the syncTask interface with methods 57 // containing logic specific to the main trie or storage 58 // tries. 59 task syncTask 60 isMainTrie bool 61 } 62 63 // NewTrieToSync initializes a trieToSync and restores any previously started segments. 64 func NewTrieToSync(sync *stateSync, root common.Hash, account common.Hash, syncTask syncTask) (*trieToSync, error) { 65 batch := sync.db.NewBatch() 66 trieToSync := &trieToSync{ 67 sync: sync, 68 root: root, 69 account: account, 70 batch: batch, 71 stackTrie: trie.NewStackTrie(batch), 72 isMainTrie: (root == sync.root), 73 task: syncTask, 74 segmentsDone: make(map[int]struct{}), 75 } 76 return trieToSync, trieToSync.loadSegments() 77 } 78 79 // loadSegments reads persistent storage and initializes trieSegments that 80 // had been previously started and need to be resumed. 81 func (t *trieToSync) loadSegments() error { 82 // Get an iterator for segments for t.root and see if we find anything. 83 // This lets us check if this trie was previously segmented, in which 84 // case we need to restore the same segments on resume. 85 it := rawdb.NewSyncSegmentsIterator(t.sync.db, t.root) 86 defer it.Release() 87 88 // Track the previously added segment as we loop over persisted values. 89 var prevSegmentStart []byte 90 91 for it.Next() { 92 // If we find any persisted segments with the specified 93 // prefix, we add a new segment to the trie here. 94 // The segment we add represents a segment ending at the 95 // key immediately prior to the segment we found on disk. 96 // This is because we do not persist the beginning of 97 // the first segment. 98 _, segmentStart := rawdb.UnpackSyncSegmentKey(it.Key()) 99 segmentStartPos := binary.BigEndian.Uint16(segmentStart[:wrappers.ShortLen]) 100 t.addSegment(prevSegmentStart, addPadding(segmentStartPos-1, 0xff)) 101 102 // keep tracking the previous segment 103 prevSegmentStart = segmentStart 104 } 105 if err := it.Error(); err != nil { 106 return err 107 } 108 109 // this creates the last segment if any were found in the loop 110 // and also handles the case where there were no segments persisted to disk. 111 t.addSegment(prevSegmentStart, nil) 112 113 for _, segment := range t.segments { 114 // for each segment we need to find the last key already persisted 115 // so syncing can begin at the subsequent key 116 var lastKey []byte 117 it := segment.trie.task.IterateLeafs(common.BytesToHash(segment.start)) 118 defer it.Release() 119 for it.Next() { 120 if len(segment.end) > 0 && bytes.Compare(it.Key(), segment.end) > 0 { 121 // don't go past the end of the segment 122 break 123 } 124 lastKey = common.CopyBytes(it.Key()) 125 segment.leafs++ 126 } 127 if lastKey != nil { 128 utils.IncrOne(lastKey) 129 segment.pos = lastKey // syncing will start from this key 130 } 131 log.Debug("statesync: loading segment", "segment", segment) 132 } 133 return it.Error() 134 } 135 136 // startSyncing adds the trieToSync's segments to the work queue 137 func (t *trieToSync) startSyncing() { 138 for _, segment := range t.segments { 139 t.sync.segments <- segment // this will queue the segment for syncing 140 } 141 } 142 143 // addSegment appends a newly created segment specified by [start] and 144 // [end] to [t.segments] and returns it. 145 // note: addSegment does not take a lock and therefore is called only 146 // before multiple segments are syncing concurrently. 147 func (t *trieToSync) addSegment(start, end []byte) *trieSegment { 148 segment := &trieSegment{ 149 start: start, 150 end: end, 151 trie: t, 152 idx: len(t.segments), 153 batch: t.sync.db.NewBatch(), 154 } 155 t.segments = append(t.segments, segment) 156 return segment 157 } 158 159 // segmentFinished is called when one the trie segment with index [idx] finishes syncing. 160 // creates intermediary hash nodes for the trie up to the last contiguous segment received from start. 161 func (t *trieToSync) segmentFinished(idx int) error { 162 t.lock.Lock() 163 defer t.lock.Unlock() 164 165 log.Debug("statesync: segment finished", "segment", t.segments[idx]) 166 t.segmentsDone[idx] = struct{}{} 167 for { 168 if _, ok := t.segmentsDone[t.segmentToHashNext]; !ok { 169 // if not the next contiguous segment from the beginning of the trie 170 // don't do anything. 171 break 172 } 173 segment := t.segments[t.segmentToHashNext] 174 175 // persist any items in the batch as they will be iterated below. 176 if err := segment.batch.Write(); err != nil { 177 return err 178 } 179 segment.batch.Reset() // reset the batch to free memory (even though it is no longer used) 180 181 // iterate all the items from the start of the segment (end is checked in the loop) 182 it := t.task.IterateLeafs(common.BytesToHash(segment.start)) 183 defer it.Release() 184 185 for it.Next() { 186 if len(segment.end) > 0 && bytes.Compare(it.Key(), segment.end) > 0 { 187 // don't go past the end of the segment. (data belongs to the next segment) 188 break 189 } 190 // update the stack trie and cap the batch it writes to. 191 value := common.CopyBytes(it.Value()) 192 if err := t.stackTrie.TryUpdate(it.Key(), value); err != nil { 193 return err 194 } 195 if t.batch.ValueSize() > t.sync.batchSize { 196 if err := t.batch.Write(); err != nil { 197 return err 198 } 199 t.batch.Reset() 200 } 201 } 202 if err := it.Error(); err != nil { 203 return err 204 } 205 t.segmentToHashNext++ 206 } 207 if t.segmentToHashNext < len(t.segments) { 208 // trie not complete 209 return nil 210 } 211 212 // when the trie is finished, this hashes any remaining nodes in the stack 213 // trie and creates the root 214 actualRoot, err := t.stackTrie.Commit() 215 if err != nil { 216 return err 217 } 218 if actualRoot != t.root { 219 return fmt.Errorf("unexpected root, expected=%s, actual=%s, account=%s", t.root, actualRoot, t.account) 220 } 221 if !t.isMainTrie { 222 // the batch containing the main trie's root will be committed on 223 // sync completion. 224 if err := t.batch.Write(); err != nil { 225 return err 226 } 227 } 228 229 // remove all segments for this root from persistent storage 230 if err := rawdb.ClearSyncSegments(t.sync.db, t.root); err != nil { 231 return err 232 } 233 return t.task.OnFinish() 234 } 235 236 // createSegmentsIfNeeded is called from the leaf handler. In case the trie syncing only has 237 // one segment but a large number of leafs ([t.estimateSize() > segmentThreshold], it will 238 // create [numSegments-1] additional segments to sync the trie. 239 func (t *trieToSync) createSegmentsIfNeeded(numSegments int) error { 240 if !t.shouldSegment() { 241 return nil 242 } 243 244 return t.createSegments(numSegments) 245 } 246 247 // shouldSegment returns true if a trie should be separated into segments. 248 func (t *trieToSync) shouldSegment() bool { 249 t.lock.Lock() 250 defer t.lock.Unlock() 251 252 // Return false if the trie has already been segmented. 253 if len(t.segments) > 1 { 254 return false 255 } 256 257 // Return true iff the estimated size of the trie exceeds [segmentThreshold]. 258 // Note: at this point there is only a single segment (loadSegments guarantees there 259 // is at least one segment). 260 segment := t.segments[0] 261 return segment.estimateSize() >= uint64(segmentThreshold) 262 } 263 264 // divide the key space into [numSegments] consecutive segments. 265 // we use 2 bytes to build the ranges and fill the rest with 266 // ones or zeroes accordingly. 267 // this represents the step between the first 2 bytes of the start 268 // key of consecutive segments. 269 // createSegments should only be called once when there is only one 270 // thread accessing this trie, such that there is no need to hold a lock. 271 func (t *trieToSync) createSegments(numSegments int) error { 272 segment := t.segments[0] 273 274 segmentStep := 0x10000 / numSegments 275 276 for i := 0; i < numSegments; i++ { 277 start := uint16(i * segmentStep) 278 end := uint16(i*segmentStep + (segmentStep - 1)) 279 280 startBytes := addPadding(start, 0x00) 281 endBytes := addPadding(end, 0xff) 282 283 // Skip any portion of the trie that has already been synced. 284 if bytes.Compare(segment.pos, endBytes) >= 0 { 285 continue 286 } 287 288 // since the first segment is already syncing, 289 // it does not need to be added to the task queue. 290 // instead, we update its end and move on to creating 291 // the next segment 292 if segment.end == nil { 293 segment.end = endBytes 294 continue 295 } 296 297 // create the segments 298 segment := t.addSegment(startBytes, endBytes) 299 if err := rawdb.WriteSyncSegment(t.sync.db, t.root, segment.start); err != nil { 300 return err 301 } 302 } 303 // add the newly created segments to the task queue 304 // after creating them. We skip the first one, as it 305 // is already syncing. 306 // this avoids concurrent access to [t.segments]. 307 for i := 1; i < len(t.segments); i++ { 308 t.sync.segments <- t.segments[i] 309 } 310 t.sync.stats.incTriesSegmented() 311 log.Debug("statesync: trie segmented for parallel sync", "root", t.root, "account", t.account, "segments", len(t.segments)) 312 return nil 313 } 314 315 // trieSegment keeps the state of syncing one segment of a [trieToSync] 316 // struct and keeps a pointer to the [trieToSync] it is syncing. 317 // each trieSegment is accessed by its own goroutine, so locks are not 318 // needed to access its fields 319 type trieSegment struct { 320 start []byte 321 pos []byte 322 end []byte 323 324 trie *trieToSync // points back to the trie the segment belongs to 325 idx int // index of this segment in the trie's segment slice 326 batch ethdb.Batch // batch for writing leafs to 327 leafs uint64 // number of leafs added to the segment 328 } 329 330 func (t *trieSegment) String() string { 331 return fmt.Sprintf( 332 "[%s](%d/%d) (start=%s,end=%s)", 333 t.trie.root, t.idx+1, len(t.trie.segments), 334 common.BytesToHash(t.start).TerminalString(), 335 common.BytesToHash(t.end).TerminalString(), 336 ) 337 } 338 339 // these functions implement the LeafSyncTask interface. 340 func (t *trieSegment) Root() common.Hash { return t.trie.root } 341 func (t *trieSegment) Account() common.Hash { return t.trie.account } 342 func (t *trieSegment) End() []byte { return t.end } 343 func (t *trieSegment) NodeType() message.NodeType { return message.StateTrieNode } 344 func (t *trieSegment) OnStart() (bool, error) { return t.trie.task.OnStart() } 345 func (t *trieSegment) OnFinish() error { return t.trie.segmentFinished(t.idx) } 346 347 func (t *trieSegment) Start() []byte { 348 if t.pos != nil { 349 return t.pos 350 } 351 return t.start 352 } 353 354 func (t *trieSegment) OnLeafs(keys, vals [][]byte) error { 355 // invoke the onLeafs callback 356 if err := t.trie.task.OnLeafs(t.batch, keys, vals); err != nil { 357 return err 358 } 359 // cap the segment's batch 360 if t.batch.ValueSize() > t.trie.sync.batchSize { 361 if err := t.batch.Write(); err != nil { 362 return err 363 } 364 t.batch.Reset() 365 } 366 t.leafs += uint64(len(keys)) 367 if len(keys) > 0 { 368 t.pos = keys[len(keys)-1] // remember the position, used in estimating trie size 369 utils.IncrOne(t.pos) 370 } 371 372 // update eta 373 t.trie.sync.stats.incLeafs(t, uint64(len(keys)), t.estimateSize()) 374 375 if t.trie.root == t.trie.sync.root { 376 return t.trie.createSegmentsIfNeeded(numMainTrieSegments) 377 } else { 378 return t.trie.createSegmentsIfNeeded(numStorageTrieSegments) 379 } 380 } 381 382 // estimateSize calculates an estimate of the number of leafs and returns it, 383 // this assumes the trie has uniform key density. 384 // Note: returns 0 if there has been no progress in syncing the trie. 385 func (t *trieSegment) estimateSize() uint64 { 386 start, pos, end := uint16(0), uint16(0), uint16(0xffff) 387 if len(t.start) > 0 { 388 start = binary.BigEndian.Uint16(t.start) 389 } 390 if len(t.pos) > 0 { 391 pos = binary.BigEndian.Uint16(t.pos) 392 } 393 if len(t.end) > 0 { 394 end = binary.BigEndian.Uint16(t.end) 395 } 396 progress := pos - start 397 if progress == 0 { 398 // this should not occur since estimateSize is called after processing 399 // a batch of leafs, which sets [pos]. 400 // avoid division by 0 out of caution. 401 return 0 402 } 403 left := end - pos 404 return t.leafs * uint64(left) / uint64(progress) 405 } 406 407 // addPadding returns a []byte of length [common.Hash], starting with the BigEndian 408 // representation of [pos], and the rest filled with [padding]. 409 func addPadding(pos uint16, padding byte) []byte { 410 packer := wrappers.Packer{Bytes: make([]byte, common.HashLength)} 411 packer.PackShort(pos) 412 packer.PackFixedBytes(bytes.Repeat([]byte{padding}, common.HashLength-wrappers.ShortLen)) 413 return packer.Bytes 414 }