github.com/dim4egster/coreth@v0.10.2/sync/statesync/trie_segments.go (about)

     1  // (c) 2021-2022, Ava Labs, Inc. All rights reserved.
     2  // See the file LICENSE for licensing terms.
     3  
     4  package statesync
     5  
     6  import (
     7  	"bytes"
     8  	"encoding/binary"
     9  	"fmt"
    10  	"sync"
    11  
    12  	"github.com/dim4egster/qmallgo/utils/wrappers"
    13  	"github.com/dim4egster/coreth/core/rawdb"
    14  	"github.com/dim4egster/coreth/ethdb"
    15  	"github.com/dim4egster/coreth/plugin/evm/message"
    16  	syncclient "github.com/dim4egster/coreth/sync/client"
    17  	"github.com/dim4egster/coreth/trie"
    18  	"github.com/dim4egster/coreth/utils"
    19  	"github.com/ethereum/go-ethereum/common"
    20  	"github.com/ethereum/go-ethereum/log"
    21  )
    22  
    23  var (
    24  	_ syncclient.LeafSyncTask = &trieSegment{}
    25  	_ fmt.Stringer            = &trieSegment{}
    26  )
    27  
    28  // trieToSync keeps the state of a single trie syncing
    29  // this can be a storage or the main trie.
    30  type trieToSync struct {
    31  	root    common.Hash
    32  	account common.Hash
    33  
    34  	// The trie consists of a slice of segments. each
    35  	// segment has a start and end range of keys, and
    36  	// contains a pointer back to this struct.
    37  	segments []*trieSegment
    38  
    39  	// These fields are used to hash the segments in
    40  	// order, even though they may finish syncing out
    41  	// of order or concurrently.
    42  	lock              sync.Mutex
    43  	segmentsDone      map[int]struct{}
    44  	segmentToHashNext int
    45  
    46  	// We use a stack trie to hash the leafs and have
    47  	// a batch used for writing it to disk.
    48  	batch     ethdb.Batch
    49  	stackTrie *trie.StackTrie
    50  
    51  	// We keep a pointer to the overall sync operation,
    52  	// used to add segments to the work queue and to
    53  	// update the eta.
    54  	sync *stateSync
    55  
    56  	// task implements the syncTask interface with methods
    57  	// containing logic specific to the main trie or storage
    58  	// tries.
    59  	task       syncTask
    60  	isMainTrie bool
    61  }
    62  
    63  // NewTrieToSync initializes a trieToSync and restores any previously started segments.
    64  func NewTrieToSync(sync *stateSync, root common.Hash, account common.Hash, syncTask syncTask) (*trieToSync, error) {
    65  	batch := sync.db.NewBatch()
    66  	trieToSync := &trieToSync{
    67  		sync:         sync,
    68  		root:         root,
    69  		account:      account,
    70  		batch:        batch,
    71  		stackTrie:    trie.NewStackTrie(batch),
    72  		isMainTrie:   (root == sync.root),
    73  		task:         syncTask,
    74  		segmentsDone: make(map[int]struct{}),
    75  	}
    76  	return trieToSync, trieToSync.loadSegments()
    77  }
    78  
    79  // loadSegments reads persistent storage and initializes trieSegments that
    80  // had been previously started and need to be resumed.
    81  func (t *trieToSync) loadSegments() error {
    82  	// Get an iterator for segments for t.root and see if we find anything.
    83  	// This lets us check if this trie was previously segmented, in which
    84  	// case we need to restore the same segments on resume.
    85  	it := rawdb.NewSyncSegmentsIterator(t.sync.db, t.root)
    86  	defer it.Release()
    87  
    88  	// Track the previously added segment as we loop over persisted values.
    89  	var prevSegmentStart []byte
    90  
    91  	for it.Next() {
    92  		// If we find any persisted segments with the specified
    93  		// prefix, we add a new segment to the trie here.
    94  		// The segment we add represents a segment ending at the
    95  		// key immediately prior to the segment we found on disk.
    96  		// This is because we do not persist the beginning of
    97  		// the first segment.
    98  		_, segmentStart := rawdb.UnpackSyncSegmentKey(it.Key())
    99  		segmentStartPos := binary.BigEndian.Uint16(segmentStart[:wrappers.ShortLen])
   100  		t.addSegment(prevSegmentStart, addPadding(segmentStartPos-1, 0xff))
   101  
   102  		// keep tracking the previous segment
   103  		prevSegmentStart = segmentStart
   104  	}
   105  	if err := it.Error(); err != nil {
   106  		return err
   107  	}
   108  
   109  	// this creates the last segment if any were found in the loop
   110  	// and also handles the case where there were no segments persisted to disk.
   111  	t.addSegment(prevSegmentStart, nil)
   112  
   113  	for _, segment := range t.segments {
   114  		// for each segment we need to find the last key already persisted
   115  		// so syncing can begin at the subsequent key
   116  		var lastKey []byte
   117  		it := segment.trie.task.IterateLeafs(common.BytesToHash(segment.start))
   118  		defer it.Release()
   119  		for it.Next() {
   120  			if len(segment.end) > 0 && bytes.Compare(it.Key(), segment.end) > 0 {
   121  				// don't go past the end of the segment
   122  				break
   123  			}
   124  			lastKey = common.CopyBytes(it.Key())
   125  			segment.leafs++
   126  		}
   127  		if lastKey != nil {
   128  			utils.IncrOne(lastKey)
   129  			segment.pos = lastKey // syncing will start from this key
   130  		}
   131  		log.Debug("statesync: loading segment", "segment", segment)
   132  	}
   133  	return it.Error()
   134  }
   135  
   136  // startSyncing adds the trieToSync's segments to the work queue
   137  func (t *trieToSync) startSyncing() {
   138  	for _, segment := range t.segments {
   139  		t.sync.segments <- segment // this will queue the segment for syncing
   140  	}
   141  }
   142  
   143  // addSegment appends a newly created segment specified by [start] and
   144  // [end] to [t.segments] and returns it.
   145  // note: addSegment does not take a lock and therefore is called only
   146  // before multiple segments are syncing concurrently.
   147  func (t *trieToSync) addSegment(start, end []byte) *trieSegment {
   148  	segment := &trieSegment{
   149  		start: start,
   150  		end:   end,
   151  		trie:  t,
   152  		idx:   len(t.segments),
   153  		batch: t.sync.db.NewBatch(),
   154  	}
   155  	t.segments = append(t.segments, segment)
   156  	return segment
   157  }
   158  
   159  // segmentFinished is called when one the trie segment with index [idx] finishes syncing.
   160  // creates intermediary hash nodes for the trie up to the last contiguous segment received from start.
   161  func (t *trieToSync) segmentFinished(idx int) error {
   162  	t.lock.Lock()
   163  	defer t.lock.Unlock()
   164  
   165  	log.Debug("statesync: segment finished", "segment", t.segments[idx])
   166  	t.segmentsDone[idx] = struct{}{}
   167  	for {
   168  		if _, ok := t.segmentsDone[t.segmentToHashNext]; !ok {
   169  			// if not the next contiguous segment from the beginning of the trie
   170  			// don't do anything.
   171  			break
   172  		}
   173  		segment := t.segments[t.segmentToHashNext]
   174  
   175  		// persist any items in the batch as they will be iterated below.
   176  		if err := segment.batch.Write(); err != nil {
   177  			return err
   178  		}
   179  		segment.batch.Reset() // reset the batch to free memory (even though it is no longer used)
   180  
   181  		// iterate all the items from the start of the segment (end is checked in the loop)
   182  		it := t.task.IterateLeafs(common.BytesToHash(segment.start))
   183  		defer it.Release()
   184  
   185  		for it.Next() {
   186  			if len(segment.end) > 0 && bytes.Compare(it.Key(), segment.end) > 0 {
   187  				// don't go past the end of the segment. (data belongs to the next segment)
   188  				break
   189  			}
   190  			// update the stack trie and cap the batch it writes to.
   191  			value := common.CopyBytes(it.Value())
   192  			if err := t.stackTrie.TryUpdate(it.Key(), value); err != nil {
   193  				return err
   194  			}
   195  			if t.batch.ValueSize() > t.sync.batchSize {
   196  				if err := t.batch.Write(); err != nil {
   197  					return err
   198  				}
   199  				t.batch.Reset()
   200  			}
   201  		}
   202  		if err := it.Error(); err != nil {
   203  			return err
   204  		}
   205  		t.segmentToHashNext++
   206  	}
   207  	if t.segmentToHashNext < len(t.segments) {
   208  		// trie not complete
   209  		return nil
   210  	}
   211  
   212  	// when the trie is finished, this hashes any remaining nodes in the stack
   213  	// trie and creates the root
   214  	actualRoot, err := t.stackTrie.Commit()
   215  	if err != nil {
   216  		return err
   217  	}
   218  	if actualRoot != t.root {
   219  		return fmt.Errorf("unexpected root, expected=%s, actual=%s, account=%s", t.root, actualRoot, t.account)
   220  	}
   221  	if !t.isMainTrie {
   222  		// the batch containing the main trie's root will be committed on
   223  		// sync completion.
   224  		if err := t.batch.Write(); err != nil {
   225  			return err
   226  		}
   227  	}
   228  
   229  	// remove all segments for this root from persistent storage
   230  	if err := rawdb.ClearSyncSegments(t.sync.db, t.root); err != nil {
   231  		return err
   232  	}
   233  	return t.task.OnFinish()
   234  }
   235  
   236  // createSegmentsIfNeeded is called from the leaf handler. In case the trie syncing only has
   237  // one segment but a large number of leafs ([t.estimateSize() > segmentThreshold], it will
   238  // create [numSegments-1] additional segments to sync the trie.
   239  func (t *trieToSync) createSegmentsIfNeeded(numSegments int) error {
   240  	if !t.shouldSegment() {
   241  		return nil
   242  	}
   243  
   244  	return t.createSegments(numSegments)
   245  }
   246  
   247  // shouldSegment returns true if a trie should be separated into segments.
   248  func (t *trieToSync) shouldSegment() bool {
   249  	t.lock.Lock()
   250  	defer t.lock.Unlock()
   251  
   252  	// Return false if the trie has already been segmented.
   253  	if len(t.segments) > 1 {
   254  		return false
   255  	}
   256  
   257  	// Return true iff the estimated size of the trie exceeds [segmentThreshold].
   258  	// Note: at this point there is only a single segment (loadSegments guarantees there
   259  	// is at least one segment).
   260  	segment := t.segments[0]
   261  	return segment.estimateSize() >= uint64(segmentThreshold)
   262  }
   263  
   264  // divide the key space into [numSegments] consecutive segments.
   265  // we use 2 bytes to build the ranges and fill the rest with
   266  // ones or zeroes accordingly.
   267  // this represents the step between the first 2 bytes of the start
   268  // key of consecutive segments.
   269  // createSegments should only be called once when there is only one
   270  // thread accessing this trie, such that there is no need to hold a lock.
   271  func (t *trieToSync) createSegments(numSegments int) error {
   272  	segment := t.segments[0]
   273  
   274  	segmentStep := 0x10000 / numSegments
   275  
   276  	for i := 0; i < numSegments; i++ {
   277  		start := uint16(i * segmentStep)
   278  		end := uint16(i*segmentStep + (segmentStep - 1))
   279  
   280  		startBytes := addPadding(start, 0x00)
   281  		endBytes := addPadding(end, 0xff)
   282  
   283  		// Skip any portion of the trie that has already been synced.
   284  		if bytes.Compare(segment.pos, endBytes) >= 0 {
   285  			continue
   286  		}
   287  
   288  		// since the first segment is already syncing,
   289  		// it does not need to be added to the task queue.
   290  		// instead, we update its end and move on to creating
   291  		// the next segment
   292  		if segment.end == nil {
   293  			segment.end = endBytes
   294  			continue
   295  		}
   296  
   297  		// create the segments
   298  		segment := t.addSegment(startBytes, endBytes)
   299  		if err := rawdb.WriteSyncSegment(t.sync.db, t.root, segment.start); err != nil {
   300  			return err
   301  		}
   302  	}
   303  	// add the newly created segments to the task queue
   304  	// after creating them. We skip the first one, as it
   305  	// is already syncing.
   306  	// this avoids concurrent access to [t.segments].
   307  	for i := 1; i < len(t.segments); i++ {
   308  		t.sync.segments <- t.segments[i]
   309  	}
   310  	t.sync.stats.incTriesSegmented()
   311  	log.Debug("statesync: trie segmented for parallel sync", "root", t.root, "account", t.account, "segments", len(t.segments))
   312  	return nil
   313  }
   314  
   315  // trieSegment keeps the state of syncing one segment of a [trieToSync]
   316  // struct and keeps a pointer to the [trieToSync] it is syncing.
   317  // each trieSegment is accessed by its own goroutine, so locks are not
   318  // needed to access its fields
   319  type trieSegment struct {
   320  	start []byte
   321  	pos   []byte
   322  	end   []byte
   323  
   324  	trie  *trieToSync // points back to the trie the segment belongs to
   325  	idx   int         // index of this segment in the trie's segment slice
   326  	batch ethdb.Batch // batch for writing leafs to
   327  	leafs uint64      // number of leafs added to the segment
   328  }
   329  
   330  func (t *trieSegment) String() string {
   331  	return fmt.Sprintf(
   332  		"[%s](%d/%d) (start=%s,end=%s)",
   333  		t.trie.root, t.idx+1, len(t.trie.segments),
   334  		common.BytesToHash(t.start).TerminalString(),
   335  		common.BytesToHash(t.end).TerminalString(),
   336  	)
   337  }
   338  
   339  // these functions implement the LeafSyncTask interface.
   340  func (t *trieSegment) Root() common.Hash          { return t.trie.root }
   341  func (t *trieSegment) Account() common.Hash       { return t.trie.account }
   342  func (t *trieSegment) End() []byte                { return t.end }
   343  func (t *trieSegment) NodeType() message.NodeType { return message.StateTrieNode }
   344  func (t *trieSegment) OnStart() (bool, error)     { return t.trie.task.OnStart() }
   345  func (t *trieSegment) OnFinish() error            { return t.trie.segmentFinished(t.idx) }
   346  
   347  func (t *trieSegment) Start() []byte {
   348  	if t.pos != nil {
   349  		return t.pos
   350  	}
   351  	return t.start
   352  }
   353  
   354  func (t *trieSegment) OnLeafs(keys, vals [][]byte) error {
   355  	// invoke the onLeafs callback
   356  	if err := t.trie.task.OnLeafs(t.batch, keys, vals); err != nil {
   357  		return err
   358  	}
   359  	// cap the segment's batch
   360  	if t.batch.ValueSize() > t.trie.sync.batchSize {
   361  		if err := t.batch.Write(); err != nil {
   362  			return err
   363  		}
   364  		t.batch.Reset()
   365  	}
   366  	t.leafs += uint64(len(keys))
   367  	if len(keys) > 0 {
   368  		t.pos = keys[len(keys)-1] // remember the position, used in estimating trie size
   369  		utils.IncrOne(t.pos)
   370  	}
   371  
   372  	// update eta
   373  	t.trie.sync.stats.incLeafs(t, uint64(len(keys)), t.estimateSize())
   374  
   375  	if t.trie.root == t.trie.sync.root {
   376  		return t.trie.createSegmentsIfNeeded(numMainTrieSegments)
   377  	} else {
   378  		return t.trie.createSegmentsIfNeeded(numStorageTrieSegments)
   379  	}
   380  }
   381  
   382  // estimateSize calculates an estimate of the number of leafs and returns it,
   383  // this assumes the trie has uniform key density.
   384  // Note: returns 0 if there has been no progress in syncing the trie.
   385  func (t *trieSegment) estimateSize() uint64 {
   386  	start, pos, end := uint16(0), uint16(0), uint16(0xffff)
   387  	if len(t.start) > 0 {
   388  		start = binary.BigEndian.Uint16(t.start)
   389  	}
   390  	if len(t.pos) > 0 {
   391  		pos = binary.BigEndian.Uint16(t.pos)
   392  	}
   393  	if len(t.end) > 0 {
   394  		end = binary.BigEndian.Uint16(t.end)
   395  	}
   396  	progress := pos - start
   397  	if progress == 0 {
   398  		// this should not occur since estimateSize is called after processing
   399  		// a batch of leafs, which sets [pos].
   400  		// avoid division by 0 out of caution.
   401  		return 0
   402  	}
   403  	left := end - pos
   404  	return t.leafs * uint64(left) / uint64(progress)
   405  }
   406  
   407  // addPadding returns a []byte of length [common.Hash], starting with the BigEndian
   408  // representation of [pos], and the rest filled with [padding].
   409  func addPadding(pos uint16, padding byte) []byte {
   410  	packer := wrappers.Packer{Bytes: make([]byte, common.HashLength)}
   411  	packer.PackShort(pos)
   412  	packer.PackFixedBytes(bytes.Repeat([]byte{padding}, common.HashLength-wrappers.ShortLen))
   413  	return packer.Bytes
   414  }