github.com/theQRL/go-zond@v0.1.1/trie/sync.go (about)

     1  // Copyright 2015 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package trie
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"sync"
    23  
    24  	"github.com/theQRL/go-zond/common"
    25  	"github.com/theQRL/go-zond/common/prque"
    26  	"github.com/theQRL/go-zond/core/rawdb"
    27  	"github.com/theQRL/go-zond/core/types"
    28  	"github.com/theQRL/go-zond/zonddb"
    29  	"github.com/theQRL/go-zond/log"
    30  )
    31  
    32  // ErrNotRequested is returned by the trie sync when it's requested to process a
    33  // node it did not request.
    34  var ErrNotRequested = errors.New("not requested")
    35  
    36  // ErrAlreadyProcessed is returned by the trie sync when it's requested to process a
    37  // node it already processed previously.
    38  var ErrAlreadyProcessed = errors.New("already processed")
    39  
    40  // maxFetchesPerDepth is the maximum number of pending trie nodes per depth. The
    41  // role of this value is to limit the number of trie nodes that get expanded in
    42  // memory if the node was configured with a significant number of peers.
    43  const maxFetchesPerDepth = 16384
    44  
    45  // SyncPath is a path tuple identifying a particular trie node either in a single
    46  // trie (account) or a layered trie (account -> storage).
    47  //
    48  // Content wise the tuple either has 1 element if it addresses a node in a single
    49  // trie or 2 elements if it addresses a node in a stacked trie.
    50  //
    51  // To support aiming arbitrary trie nodes, the path needs to support odd nibble
    52  // lengths. To avoid transferring expanded hex form over the network, the last
    53  // part of the tuple (which needs to index into the middle of a trie) is compact
    54  // encoded. In case of a 2-tuple, the first item is always 32 bytes so that is
    55  // simple binary encoded.
    56  //
    57  // Examples:
    58  //   - Path 0x9  -> {0x19}
    59  //   - Path 0x99 -> {0x0099}
    60  //   - Path 0x01234567890123456789012345678901012345678901234567890123456789019  -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x19}
    61  //   - Path 0x012345678901234567890123456789010123456789012345678901234567890199 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x0099}
    62  type SyncPath [][]byte
    63  
    64  // NewSyncPath converts an expanded trie path from nibble form into a compact
    65  // version that can be sent over the network.
    66  func NewSyncPath(path []byte) SyncPath {
    67  	// If the hash is from the account trie, append a single item, if it
    68  	// is from a storage trie, append a tuple. Note, the length 64 is
    69  	// clashing between account leaf and storage root. It's fine though
    70  	// because having a trie node at 64 depth means a hash collision was
    71  	// found and we're long dead.
    72  	if len(path) < 64 {
    73  		return SyncPath{hexToCompact(path)}
    74  	}
    75  	return SyncPath{hexToKeybytes(path[:64]), hexToCompact(path[64:])}
    76  }
    77  
    78  // LeafCallback is a callback type invoked when a trie operation reaches a leaf
    79  // node.
    80  //
    81  // The keys is a path tuple identifying a particular trie node either in a single
    82  // trie (account) or a layered trie (account -> storage). Each key in the tuple
    83  // is in the raw format(32 bytes).
    84  //
    85  // The path is a composite hexary path identifying the trie node. All the key
    86  // bytes are converted to the hexary nibbles and composited with the parent path
    87  // if the trie node is in a layered trie.
    88  //
    89  // It's used by state sync and commit to allow handling external references
    90  // between account and storage tries. And also it's used in the state healing
    91  // for extracting the raw states(leaf nodes) with corresponding paths.
    92  type LeafCallback func(keys [][]byte, path []byte, leaf []byte, parent common.Hash, parentPath []byte) error
    93  
    94  // nodeRequest represents a scheduled or already in-flight trie node retrieval request.
    95  type nodeRequest struct {
    96  	hash common.Hash // Hash of the trie node to retrieve
    97  	path []byte      // Merkle path leading to this node for prioritization
    98  	data []byte      // Data content of the node, cached until all subtrees complete
    99  
   100  	parent   *nodeRequest // Parent state node referencing this entry
   101  	deps     int          // Number of dependencies before allowed to commit this node
   102  	callback LeafCallback // Callback to invoke if a leaf node it reached on this branch
   103  }
   104  
   105  // codeRequest represents a scheduled or already in-flight bytecode retrieval request.
   106  type codeRequest struct {
   107  	hash    common.Hash    // Hash of the contract bytecode to retrieve
   108  	path    []byte         // Merkle path leading to this node for prioritization
   109  	data    []byte         // Data content of the node, cached until all subtrees complete
   110  	parents []*nodeRequest // Parent state nodes referencing this entry (notify all upon completion)
   111  }
   112  
   113  // NodeSyncResult is a response with requested trie node along with its node path.
   114  type NodeSyncResult struct {
   115  	Path string // Path of the originally unknown trie node
   116  	Data []byte // Data content of the retrieved trie node
   117  }
   118  
   119  // CodeSyncResult is a response with requested bytecode along with its hash.
   120  type CodeSyncResult struct {
   121  	Hash common.Hash // Hash the originally unknown bytecode
   122  	Data []byte      // Data content of the retrieved bytecode
   123  }
   124  
   125  // syncMemBatch is an in-memory buffer of successfully downloaded but not yet
   126  // persisted data items.
   127  type syncMemBatch struct {
   128  	nodes  map[string][]byte      // In-memory membatch of recently completed nodes
   129  	hashes map[string]common.Hash // Hashes of recently completed nodes
   130  	codes  map[common.Hash][]byte // In-memory membatch of recently completed codes
   131  	size   uint64                 // Estimated batch-size of in-memory data.
   132  }
   133  
   134  // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes.
   135  func newSyncMemBatch() *syncMemBatch {
   136  	return &syncMemBatch{
   137  		nodes:  make(map[string][]byte),
   138  		hashes: make(map[string]common.Hash),
   139  		codes:  make(map[common.Hash][]byte),
   140  	}
   141  }
   142  
   143  // hasNode reports the trie node with specific path is already cached.
   144  func (batch *syncMemBatch) hasNode(path []byte) bool {
   145  	_, ok := batch.nodes[string(path)]
   146  	return ok
   147  }
   148  
   149  // hasCode reports the contract code with specific hash is already cached.
   150  func (batch *syncMemBatch) hasCode(hash common.Hash) bool {
   151  	_, ok := batch.codes[hash]
   152  	return ok
   153  }
   154  
   155  // Sync is the main state trie synchronisation scheduler, which provides yet
   156  // unknown trie hashes to retrieve, accepts node data associated with said hashes
   157  // and reconstructs the trie step by step until all is done.
   158  type Sync struct {
   159  	scheme   string                       // Node scheme descriptor used in database.
   160  	database zonddb.KeyValueReader         // Persistent database to check for existing entries
   161  	membatch *syncMemBatch                // Memory buffer to avoid frequent database writes
   162  	nodeReqs map[string]*nodeRequest      // Pending requests pertaining to a trie node path
   163  	codeReqs map[common.Hash]*codeRequest // Pending requests pertaining to a code hash
   164  	queue    *prque.Prque[int64, any]     // Priority queue with the pending requests
   165  	fetches  map[int]int                  // Number of active fetches per trie node depth
   166  }
   167  
   168  // NewSync creates a new trie data download scheduler.
   169  func NewSync(root common.Hash, database zonddb.KeyValueReader, callback LeafCallback, scheme string) *Sync {
   170  	ts := &Sync{
   171  		scheme:   scheme,
   172  		database: database,
   173  		membatch: newSyncMemBatch(),
   174  		nodeReqs: make(map[string]*nodeRequest),
   175  		codeReqs: make(map[common.Hash]*codeRequest),
   176  		queue:    prque.New[int64, any](nil), // Ugh, can contain both string and hash, whyyy
   177  		fetches:  make(map[int]int),
   178  	}
   179  	ts.AddSubTrie(root, nil, common.Hash{}, nil, callback)
   180  	return ts
   181  }
   182  
   183  // AddSubTrie registers a new trie to the sync code, rooted at the designated
   184  // parent for completion tracking. The given path is a unique node path in
   185  // hex format and contain all the parent path if it's layered trie node.
   186  func (s *Sync) AddSubTrie(root common.Hash, path []byte, parent common.Hash, parentPath []byte, callback LeafCallback) {
   187  	// Short circuit if the trie is empty or already known
   188  	if root == types.EmptyRootHash {
   189  		return
   190  	}
   191  	if s.membatch.hasNode(path) {
   192  		return
   193  	}
   194  	owner, inner := ResolvePath(path)
   195  	if rawdb.HasTrieNode(s.database, owner, inner, root, s.scheme) {
   196  		return
   197  	}
   198  	// Assemble the new sub-trie sync request
   199  	req := &nodeRequest{
   200  		hash:     root,
   201  		path:     path,
   202  		callback: callback,
   203  	}
   204  	// If this sub-trie has a designated parent, link them together
   205  	if parent != (common.Hash{}) {
   206  		ancestor := s.nodeReqs[string(parentPath)]
   207  		if ancestor == nil {
   208  			panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent))
   209  		}
   210  		ancestor.deps++
   211  		req.parent = ancestor
   212  	}
   213  	s.scheduleNodeRequest(req)
   214  }
   215  
   216  // AddCodeEntry schedules the direct retrieval of a contract code that should not
   217  // be interpreted as a trie node, but rather accepted and stored into the database
   218  // as is.
   219  func (s *Sync) AddCodeEntry(hash common.Hash, path []byte, parent common.Hash, parentPath []byte) {
   220  	// Short circuit if the entry is empty or already known
   221  	if hash == types.EmptyCodeHash {
   222  		return
   223  	}
   224  	if s.membatch.hasCode(hash) {
   225  		return
   226  	}
   227  	// If database says duplicate, the blob is present for sure.
   228  	// Note we only check the existence with new code scheme, snap
   229  	// sync is expected to run with a fresh new node. Even there
   230  	// exists the code with legacy format, fetch and store with
   231  	// new scheme anyway.
   232  	if rawdb.HasCodeWithPrefix(s.database, hash) {
   233  		return
   234  	}
   235  	// Assemble the new sub-trie sync request
   236  	req := &codeRequest{
   237  		path: path,
   238  		hash: hash,
   239  	}
   240  	// If this sub-trie has a designated parent, link them together
   241  	if parent != (common.Hash{}) {
   242  		ancestor := s.nodeReqs[string(parentPath)] // the parent of codereq can ONLY be nodereq
   243  		if ancestor == nil {
   244  			panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent))
   245  		}
   246  		ancestor.deps++
   247  		req.parents = append(req.parents, ancestor)
   248  	}
   249  	s.scheduleCodeRequest(req)
   250  }
   251  
   252  // Missing retrieves the known missing nodes from the trie for retrieval. To aid
   253  // both zond/6x style fast sync and snap/1x style state sync, the paths of trie
   254  // nodes are returned too, as well as separate hash list for codes.
   255  func (s *Sync) Missing(max int) ([]string, []common.Hash, []common.Hash) {
   256  	var (
   257  		nodePaths  []string
   258  		nodeHashes []common.Hash
   259  		codeHashes []common.Hash
   260  	)
   261  	for !s.queue.Empty() && (max == 0 || len(nodeHashes)+len(codeHashes) < max) {
   262  		// Retrieve the next item in line
   263  		item, prio := s.queue.Peek()
   264  
   265  		// If we have too many already-pending tasks for this depth, throttle
   266  		depth := int(prio >> 56)
   267  		if s.fetches[depth] > maxFetchesPerDepth {
   268  			break
   269  		}
   270  		// Item is allowed to be scheduled, add it to the task list
   271  		s.queue.Pop()
   272  		s.fetches[depth]++
   273  
   274  		switch item := item.(type) {
   275  		case common.Hash:
   276  			codeHashes = append(codeHashes, item)
   277  		case string:
   278  			req, ok := s.nodeReqs[item]
   279  			if !ok {
   280  				log.Error("Missing node request", "path", item)
   281  				continue // System very wrong, shouldn't happen
   282  			}
   283  			nodePaths = append(nodePaths, item)
   284  			nodeHashes = append(nodeHashes, req.hash)
   285  		}
   286  	}
   287  	return nodePaths, nodeHashes, codeHashes
   288  }
   289  
   290  // ProcessCode injects the received data for requested item. Note it can
   291  // happpen that the single response commits two pending requests(e.g.
   292  // there are two requests one for code and one for node but the hash
   293  // is same). In this case the second response for the same hash will
   294  // be treated as "non-requested" item or "already-processed" item but
   295  // there is no downside.
   296  func (s *Sync) ProcessCode(result CodeSyncResult) error {
   297  	// If the code was not requested or it's already processed, bail out
   298  	req := s.codeReqs[result.Hash]
   299  	if req == nil {
   300  		return ErrNotRequested
   301  	}
   302  	if req.data != nil {
   303  		return ErrAlreadyProcessed
   304  	}
   305  	req.data = result.Data
   306  	return s.commitCodeRequest(req)
   307  }
   308  
   309  // ProcessNode injects the received data for requested item. Note it can
   310  // happen that the single response commits two pending requests(e.g.
   311  // there are two requests one for code and one for node but the hash
   312  // is same). In this case the second response for the same hash will
   313  // be treated as "non-requested" item or "already-processed" item but
   314  // there is no downside.
   315  func (s *Sync) ProcessNode(result NodeSyncResult) error {
   316  	// If the trie node was not requested or it's already processed, bail out
   317  	req := s.nodeReqs[result.Path]
   318  	if req == nil {
   319  		return ErrNotRequested
   320  	}
   321  	if req.data != nil {
   322  		return ErrAlreadyProcessed
   323  	}
   324  	// Decode the node data content and update the request
   325  	node, err := decodeNode(req.hash.Bytes(), result.Data)
   326  	if err != nil {
   327  		return err
   328  	}
   329  	req.data = result.Data
   330  
   331  	// Create and schedule a request for all the children nodes
   332  	requests, err := s.children(req, node)
   333  	if err != nil {
   334  		return err
   335  	}
   336  	if len(requests) == 0 && req.deps == 0 {
   337  		s.commitNodeRequest(req)
   338  	} else {
   339  		req.deps += len(requests)
   340  		for _, child := range requests {
   341  			s.scheduleNodeRequest(child)
   342  		}
   343  	}
   344  	return nil
   345  }
   346  
   347  // Commit flushes the data stored in the internal membatch out to persistent
   348  // storage, returning any occurred error.
   349  func (s *Sync) Commit(dbw zonddb.Batch) error {
   350  	// Dump the membatch into a database dbw
   351  	for path, value := range s.membatch.nodes {
   352  		owner, inner := ResolvePath([]byte(path))
   353  		rawdb.WriteTrieNode(dbw, owner, inner, s.membatch.hashes[path], value, s.scheme)
   354  	}
   355  	for hash, value := range s.membatch.codes {
   356  		rawdb.WriteCode(dbw, hash, value)
   357  	}
   358  	// Drop the membatch data and return
   359  	s.membatch = newSyncMemBatch()
   360  	return nil
   361  }
   362  
   363  // MemSize returns an estimated size (in bytes) of the data held in the membatch.
   364  func (s *Sync) MemSize() uint64 {
   365  	return s.membatch.size
   366  }
   367  
   368  // Pending returns the number of state entries currently pending for download.
   369  func (s *Sync) Pending() int {
   370  	return len(s.nodeReqs) + len(s.codeReqs)
   371  }
   372  
   373  // schedule inserts a new state retrieval request into the fetch queue. If there
   374  // is already a pending request for this node, the new request will be discarded
   375  // and only a parent reference added to the old one.
   376  func (s *Sync) scheduleNodeRequest(req *nodeRequest) {
   377  	s.nodeReqs[string(req.path)] = req
   378  
   379  	// Schedule the request for future retrieval. This queue is shared
   380  	// by both node requests and code requests.
   381  	prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents
   382  	for i := 0; i < 14 && i < len(req.path); i++ {
   383  		prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order
   384  	}
   385  	s.queue.Push(string(req.path), prio)
   386  }
   387  
   388  // schedule inserts a new state retrieval request into the fetch queue. If there
   389  // is already a pending request for this node, the new request will be discarded
   390  // and only a parent reference added to the old one.
   391  func (s *Sync) scheduleCodeRequest(req *codeRequest) {
   392  	// If we're already requesting this node, add a new reference and stop
   393  	if old, ok := s.codeReqs[req.hash]; ok {
   394  		old.parents = append(old.parents, req.parents...)
   395  		return
   396  	}
   397  	s.codeReqs[req.hash] = req
   398  
   399  	// Schedule the request for future retrieval. This queue is shared
   400  	// by both node requests and code requests.
   401  	prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents
   402  	for i := 0; i < 14 && i < len(req.path); i++ {
   403  		prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order
   404  	}
   405  	s.queue.Push(req.hash, prio)
   406  }
   407  
   408  // children retrieves all the missing children of a state trie entry for future
   409  // retrieval scheduling.
   410  func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) {
   411  	// Gather all the children of the node, irrelevant whether known or not
   412  	type childNode struct {
   413  		path []byte
   414  		node node
   415  	}
   416  	var children []childNode
   417  
   418  	switch node := (object).(type) {
   419  	case *shortNode:
   420  		key := node.Key
   421  		if hasTerm(key) {
   422  			key = key[:len(key)-1]
   423  		}
   424  		children = []childNode{{
   425  			node: node.Val,
   426  			path: append(append([]byte(nil), req.path...), key...),
   427  		}}
   428  	case *fullNode:
   429  		for i := 0; i < 17; i++ {
   430  			if node.Children[i] != nil {
   431  				children = append(children, childNode{
   432  					node: node.Children[i],
   433  					path: append(append([]byte(nil), req.path...), byte(i)),
   434  				})
   435  			}
   436  		}
   437  	default:
   438  		panic(fmt.Sprintf("unknown node: %+v", node))
   439  	}
   440  	// Iterate over the children, and request all unknown ones
   441  	var (
   442  		missing = make(chan *nodeRequest, len(children))
   443  		pending sync.WaitGroup
   444  	)
   445  	for _, child := range children {
   446  		// Notify any external watcher of a new key/value node
   447  		if req.callback != nil {
   448  			if node, ok := (child.node).(valueNode); ok {
   449  				var paths [][]byte
   450  				if len(child.path) == 2*common.HashLength {
   451  					paths = append(paths, hexToKeybytes(child.path))
   452  				} else if len(child.path) == 4*common.HashLength {
   453  					paths = append(paths, hexToKeybytes(child.path[:2*common.HashLength]))
   454  					paths = append(paths, hexToKeybytes(child.path[2*common.HashLength:]))
   455  				}
   456  				if err := req.callback(paths, child.path, node, req.hash, req.path); err != nil {
   457  					return nil, err
   458  				}
   459  			}
   460  		}
   461  		// If the child references another node, resolve or schedule
   462  		if node, ok := (child.node).(hashNode); ok {
   463  			// Try to resolve the node from the local database
   464  			if s.membatch.hasNode(child.path) {
   465  				continue
   466  			}
   467  			// Check the presence of children concurrently
   468  			pending.Add(1)
   469  			go func(child childNode) {
   470  				defer pending.Done()
   471  
   472  				// If database says duplicate, then at least the trie node is present
   473  				// and we hold the assumption that it's NOT legacy contract code.
   474  				var (
   475  					chash        = common.BytesToHash(node)
   476  					owner, inner = ResolvePath(child.path)
   477  				)
   478  				if rawdb.HasTrieNode(s.database, owner, inner, chash, s.scheme) {
   479  					return
   480  				}
   481  				// Locally unknown node, schedule for retrieval
   482  				missing <- &nodeRequest{
   483  					path:     child.path,
   484  					hash:     chash,
   485  					parent:   req,
   486  					callback: req.callback,
   487  				}
   488  			}(child)
   489  		}
   490  	}
   491  	pending.Wait()
   492  
   493  	requests := make([]*nodeRequest, 0, len(children))
   494  	for done := false; !done; {
   495  		select {
   496  		case miss := <-missing:
   497  			requests = append(requests, miss)
   498  		default:
   499  			done = true
   500  		}
   501  	}
   502  	return requests, nil
   503  }
   504  
   505  // commit finalizes a retrieval request and stores it into the membatch. If any
   506  // of the referencing parent requests complete due to this commit, they are also
   507  // committed themselves.
   508  func (s *Sync) commitNodeRequest(req *nodeRequest) error {
   509  	// Write the node content to the membatch
   510  	s.membatch.nodes[string(req.path)] = req.data
   511  	s.membatch.hashes[string(req.path)] = req.hash
   512  	// The size tracking refers to the db-batch, not the in-memory data.
   513  	// Therefore, we ignore the req.path, and account only for the hash+data
   514  	// which eventually is written to db.
   515  	s.membatch.size += common.HashLength + uint64(len(req.data))
   516  	delete(s.nodeReqs, string(req.path))
   517  	s.fetches[len(req.path)]--
   518  
   519  	// Check parent for completion
   520  	if req.parent != nil {
   521  		req.parent.deps--
   522  		if req.parent.deps == 0 {
   523  			if err := s.commitNodeRequest(req.parent); err != nil {
   524  				return err
   525  			}
   526  		}
   527  	}
   528  	return nil
   529  }
   530  
   531  // commit finalizes a retrieval request and stores it into the membatch. If any
   532  // of the referencing parent requests complete due to this commit, they are also
   533  // committed themselves.
   534  func (s *Sync) commitCodeRequest(req *codeRequest) error {
   535  	// Write the node content to the membatch
   536  	s.membatch.codes[req.hash] = req.data
   537  	s.membatch.size += common.HashLength + uint64(len(req.data))
   538  	delete(s.codeReqs, req.hash)
   539  	s.fetches[len(req.path)]--
   540  
   541  	// Check all parents for completion
   542  	for _, parent := range req.parents {
   543  		parent.deps--
   544  		if parent.deps == 0 {
   545  			if err := s.commitNodeRequest(parent); err != nil {
   546  				return err
   547  			}
   548  		}
   549  	}
   550  	return nil
   551  }
   552  
   553  // ResolvePath resolves the provided composite node path by separating the
   554  // path in account trie if it's existent.
   555  func ResolvePath(path []byte) (common.Hash, []byte) {
   556  	var owner common.Hash
   557  	if len(path) >= 2*common.HashLength {
   558  		owner = common.BytesToHash(hexToKeybytes(path[:2*common.HashLength]))
   559  		path = path[2*common.HashLength:]
   560  	}
   561  	return owner, path
   562  }