github.com/klaytn/klaytn@v1.12.1/storage/statedb/sync.go (about)

     1  // Modifications Copyright 2018 The klaytn Authors
     2  // Copyright 2015 The go-ethereum Authors
     3  // This file is part of the go-ethereum library.
     4  //
     5  // The go-ethereum library is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Lesser General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // The go-ethereum library is distributed in the hope that it will be useful,
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  // GNU Lesser General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Lesser General Public License
    16  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    17  //
    18  // This file is derived from trie/sync.go (2018/06/04).
    19  // Modified and improved for the klaytn development.
    20  
    21  package statedb
    22  
    23  import (
    24  	"errors"
    25  	"fmt"
    26  	"strconv"
    27  
    28  	lru "github.com/hashicorp/golang-lru"
    29  	"github.com/klaytn/klaytn/common"
    30  	"github.com/klaytn/klaytn/common/prque"
    31  	"github.com/klaytn/klaytn/storage/database"
    32  )
    33  
    34  // ErrNotRequested is returned by the trie sync when it's requested to process a
    35  // node it did not request.
    36  var ErrNotRequested = errors.New("not requested")
    37  
    38  // ErrAlreadyProcessed is returned by the trie sync when it's requested to process a
    39  // node it already processed previously.
    40  var ErrAlreadyProcessed = errors.New("already processed")
    41  
    42  // maxFetchesPerDepth is the maximum number of pending trie nodes per depth. The
    43  // role of this value is to limit the number of trie nodes that get expanded in
    44  // memory if the node was configured with a significant number of peers.
    45  const maxFetchesPerDepth = 16384
    46  
    47  // request represents a scheduled or already in-flight state retrieval request.
    48  type request struct {
    49  	path []byte      // Merkle path leading to this node for prioritization
    50  	hash common.Hash // Hash of the node data content to retrieve
    51  	data []byte      // Data content of the node, cached until all subtrees complete
    52  	code bool        // Whether this is a code entry
    53  
    54  	parents []*request // Parent state nodes referencing this entry (notify all upon completion)
    55  	depth   int        // Depth level within the trie the node is located to prioritise DFS
    56  	deps    int        // Number of dependencies before allowed to commit this node
    57  
    58  	callback LeafCallback // Callback to invoke if a leaf node it reached on this branch
    59  }
    60  
    61  // SyncPath is a path tuple identifying a particular trie node either in a single
    62  // trie (account) or a layered trie (account -> storage).
    63  //
    64  // Content wise the tuple either has 1 element if it addresses a node in a single
    65  // trie or 2 elements if it addresses a node in a stacked trie.
    66  //
    67  // To support aiming arbitrary trie nodes, the path needs to support odd nibble
    68  // lengths. To avoid transferring expanded hex form over the network, the last
    69  // part of the tuple (which needs to index into the middle of a trie) is compact
    70  // encoded. In case of a 2-tuple, the first item is always 32 bytes so that is
    71  // simple binary encoded.
    72  //
    73  // Examples:
    74  //   - Path 0x9  -> {0x19}
    75  //   - Path 0x99 -> {0x0099}
    76  //   - Path 0x01234567890123456789012345678901012345678901234567890123456789019  -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x19}
    77  //   - Path 0x012345678901234567890123456789010123456789012345678901234567890199 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x0099}
    78  type SyncPath [][]byte
    79  
    80  // newSyncPath converts an expanded trie path from nibble form into a compact
    81  // version that can be sent over the network.
    82  func newSyncPath(path []byte) SyncPath {
    83  	// If the hash is from the account trie, append a single item, if it
    84  	// is from the a storage trie, append a tuple. Note, the length 64 is
    85  	// clashing between account leaf and storage root. It's fine though
    86  	// because having a trie node at 64 depth means a hash collision was
    87  	// found and we're long dead.
    88  	if len(path) < 64 {
    89  		return SyncPath{hexToCompact(path)}
    90  	}
    91  	return SyncPath{hexToKeybytes(path[:64]), hexToCompact(path[64:])}
    92  }
    93  
    94  // SyncResult is a response with requested data along with it's hash.
    95  type SyncResult struct {
    96  	Hash common.Hash // Hash of the originally unknown trie node
    97  	Data []byte      // Data content of the retrieved node
    98  	Err  error
    99  }
   100  
   101  // syncMemBatch is an in-memory buffer of successfully downloaded but not yet
   102  // persisted data items.
   103  type syncMemBatch struct {
   104  	nodes map[common.Hash][]byte // In-memory membatch of recently completed nodes
   105  	codes map[common.Hash][]byte // In-memory membatch of recently completed codes
   106  }
   107  
   108  // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes.
   109  func newSyncMemBatch() *syncMemBatch {
   110  	return &syncMemBatch{
   111  		nodes: make(map[common.Hash][]byte),
   112  		codes: make(map[common.Hash][]byte),
   113  	}
   114  }
   115  
   116  // hasNode reports the trie node with specific hash is already cached.
   117  func (batch *syncMemBatch) hasNode(hash common.Hash) bool {
   118  	_, ok := batch.nodes[hash]
   119  	return ok
   120  }
   121  
   122  // hasCode reports the contract code with specific hash is already cached.
   123  func (batch *syncMemBatch) hasCode(hash common.Hash) bool {
   124  	_, ok := batch.codes[hash]
   125  	return ok
   126  }
   127  
   128  type StateTrieReadDB interface {
   129  	ReadTrieNode(hash common.ExtHash) ([]byte, error)
   130  	HasTrieNode(hash common.ExtHash) (bool, error)
   131  	HasCodeWithPrefix(hash common.Hash) bool
   132  }
   133  
   134  // TrieSync is the main state trie synchronisation scheduler, which provides yet
   135  // unknown trie hashes to retrieve, accepts node data associated with said hashes
   136  // and reconstructs the trie step by step until all is done.
   137  type TrieSync struct {
   138  	database         StateTrieReadDB          // Persistent database to check for existing entries
   139  	membatch         *syncMemBatch            // Memory buffer to avoid frequent database writes
   140  	nodeReqs         map[common.Hash]*request // Pending requests pertaining to a trie node hash
   141  	codeReqs         map[common.Hash]*request // Pending requests pertaining to a code hash
   142  	queue            *prque.Prque             // Priority queue with the pending requests
   143  	fetches          map[int]int              // Number of active fetches per trie node depth
   144  	retrievedByDepth map[int]int              // Retrieved trie node number counted by depth
   145  	committedByDepth map[int]int              // Committed trie nodes number counted by depth
   146  	bloom            *SyncBloom               // Bloom filter for fast state existence checks
   147  	exist            *lru.Cache               // exist to check if the trie node is already written or not
   148  }
   149  
   150  // NewTrieSync creates a new trie data download scheduler.
   151  // If both bloom and cache are set, only cache is used.
   152  func NewTrieSync(root common.Hash, database StateTrieReadDB, callback LeafCallback, bloom *SyncBloom, lruCache *lru.Cache) *TrieSync {
   153  	ts := &TrieSync{
   154  		database:         database,
   155  		membatch:         newSyncMemBatch(),
   156  		nodeReqs:         make(map[common.Hash]*request),
   157  		codeReqs:         make(map[common.Hash]*request),
   158  		queue:            prque.New(),
   159  		fetches:          make(map[int]int),
   160  		retrievedByDepth: make(map[int]int),
   161  		committedByDepth: make(map[int]int),
   162  		bloom:            bloom,
   163  		exist:            lruCache,
   164  	}
   165  	ts.AddSubTrie(root, nil, 0, common.Hash{}, callback)
   166  	return ts
   167  }
   168  
   169  // AddSubTrie registers a new trie to the sync code, rooted at the designated parent.
   170  func (s *TrieSync) AddSubTrie(root common.Hash, path []byte, depth int, parent common.Hash, callback LeafCallback) {
   171  	// Short circuit if the trie is empty or already known
   172  	if root == emptyRoot {
   173  		return
   174  	}
   175  	if s.membatch.hasNode(root) {
   176  		return
   177  	}
   178  	if s.exist != nil {
   179  		if _, ok := s.exist.Get(root); ok {
   180  			// already written in migration, skip the node
   181  			return
   182  		}
   183  	} else if s.bloom == nil || s.bloom.Contains(root[:]) {
   184  		// Bloom filter says this might be a duplicate, double check.
   185  		// If database says yes, then at least the trie node is present
   186  		// and we hold the assumption that it's NOT legacy contract code.
   187  		if ok, _ := s.database.HasTrieNode(root.ExtendZero()); ok {
   188  			logger.Debug("skip write sub-trie", "root", root.String())
   189  			return
   190  		}
   191  		// False positive, bump fault meter
   192  		bloomFaultMeter.Mark(1)
   193  	}
   194  	// Assemble the new sub-trie sync request
   195  	req := &request{
   196  		path:     path,
   197  		hash:     root,
   198  		depth:    depth,
   199  		callback: callback,
   200  	}
   201  	// If this sub-trie has a designated parent, link them together
   202  	if parent != (common.Hash{}) {
   203  		ancestor := s.nodeReqs[parent]
   204  		if ancestor == nil {
   205  			panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent))
   206  		}
   207  		ancestor.deps++
   208  		req.parents = append(req.parents, ancestor)
   209  	}
   210  	s.schedule(req)
   211  }
   212  
   213  // AddCodeEntry schedules the direct retrieval of a contract code that should not
   214  // be interpreted as a trie node, but rather accepted and stored into the database
   215  // as is.
   216  func (s *TrieSync) AddCodeEntry(hash common.Hash, path []byte, depth int, parent common.Hash) {
   217  	// Short circuit if the entry is empty or already known
   218  	if hash == emptyState {
   219  		return
   220  	}
   221  	if s.membatch.hasCode(hash) {
   222  		return
   223  	}
   224  	if s.exist != nil {
   225  		if _, ok := s.exist.Get(hash); ok {
   226  			// already written in migration, skip the node
   227  			return
   228  		}
   229  	} else if s.bloom == nil || s.bloom.Contains(hash[:]) {
   230  		// Bloom filter says this might be a duplicate, double check.
   231  		// If database says yes, the blob is present for sure.
   232  		// Note we only check the existence with new code scheme, fast
   233  		// sync is expected to run with a fresh new node. Even there
   234  		// exists the code with legacy format, fetch and store with
   235  		// new scheme anyway.
   236  		if ok := s.database.HasCodeWithPrefix(hash); ok {
   237  			logger.Debug("skip write code entry", "root", hash.String())
   238  			return
   239  		}
   240  		// False positive, bump fault meter
   241  		bloomFaultMeter.Mark(1)
   242  	}
   243  	// Assemble the new sub-trie sync request
   244  	req := &request{
   245  		path:  path,
   246  		hash:  hash,
   247  		code:  true,
   248  		depth: depth,
   249  	}
   250  	// If this sub-trie has a designated parent, link them together
   251  	if parent != (common.Hash{}) {
   252  		ancestor := s.nodeReqs[parent] // the parent of codereq can ONLY be nodereq
   253  		if ancestor == nil {
   254  			panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent))
   255  		}
   256  		ancestor.deps++
   257  		req.parents = append(req.parents, ancestor)
   258  	}
   259  	s.schedule(req)
   260  }
   261  
   262  // Missing retrieves the known missing nodes from the trie for retrieval. To aid
   263  // both klay/6x style fast sync and snap/1x style state sync, the paths of trie
   264  // nodes are returned too, as well as separate hash list for codes.
   265  func (s *TrieSync) Missing(max int) (nodes []common.Hash, paths []SyncPath, codes []common.Hash) {
   266  	var (
   267  		nodeHashes []common.Hash
   268  		nodePaths  []SyncPath
   269  		codeHashes []common.Hash
   270  	)
   271  	for !s.queue.Empty() && (max == 0 || len(nodeHashes)+len(codeHashes) < max) {
   272  		// Retrieve th enext item in line
   273  		item, prio := s.queue.Peek()
   274  
   275  		// If we have too many already-pending tasks for this depth, throttle
   276  		depth := int(prio >> 56)
   277  		if s.fetches[depth] > maxFetchesPerDepth {
   278  			break
   279  		}
   280  		// Item is allowed to be scheduled, add it to the task list
   281  		s.queue.Pop()
   282  		s.fetches[depth]++
   283  
   284  		hash := item.(common.Hash)
   285  		if req, ok := s.nodeReqs[hash]; ok {
   286  			nodeHashes = append(nodeHashes, hash)
   287  			nodePaths = append(nodePaths, newSyncPath(req.path))
   288  		} else {
   289  			codeHashes = append(codeHashes, hash)
   290  		}
   291  	}
   292  	return nodeHashes, nodePaths, codeHashes
   293  }
   294  
   295  // Process injects the received data for requested item. Note it can
   296  // happen that the single response commits two pending requests(e.g.
   297  // there are two requests one for code and one for node but the hash
   298  // is same). In this case the second response for the same hash will
   299  // be treated as "non-requested" item or "already-processed" item but
   300  // there is no downside.
   301  func (s *TrieSync) Process(result SyncResult) error {
   302  	// If the item was not requested either for code or node, bail out
   303  	if s.nodeReqs[result.Hash] == nil && s.codeReqs[result.Hash] == nil {
   304  		return ErrNotRequested
   305  	}
   306  	// There is an pending code request for this data, commit directly
   307  	var filled bool
   308  	if req := s.codeReqs[result.Hash]; req != nil && req.data == nil {
   309  		filled = true
   310  		req.data = result.Data
   311  		s.commit(req)
   312  	}
   313  	// There is an pending node request for this data, fill it.
   314  	if req := s.nodeReqs[result.Hash]; req != nil && req.data == nil {
   315  		filled = true
   316  		// Decode the node data content and update the request
   317  		node, err := decodeNode(result.Hash[:], result.Data)
   318  		if err != nil {
   319  			return err
   320  		}
   321  		req.data = result.Data
   322  
   323  		// Create and schedule a request for all the children nodes
   324  		requests, err := s.children(req, node)
   325  		if err != nil {
   326  			return err
   327  		}
   328  		if len(requests) == 0 && req.deps == 0 {
   329  			s.commit(req)
   330  		} else {
   331  			req.deps += len(requests)
   332  			for _, child := range requests {
   333  				s.schedule(child)
   334  			}
   335  		}
   336  	}
   337  	if !filled {
   338  		return ErrAlreadyProcessed
   339  	}
   340  	return nil
   341  }
   342  
   343  // Commit flushes the data stored in the internal membatch out to persistent
   344  // storage, returning the number of items written and any occurred error.
   345  func (s *TrieSync) Commit(dbw database.Batch) (int, error) {
   346  	written := 0
   347  	// Dump the membatch into a database dbw
   348  	for key, value := range s.membatch.nodes {
   349  		if err := dbw.Put(database.TrieNodeKey(key.ExtendZero()), value); err != nil { // only works with hash32
   350  			return written, err
   351  		}
   352  		if s.bloom != nil {
   353  			s.bloom.Add(key[:])
   354  		}
   355  		if s.exist != nil {
   356  			s.exist.Add(key, nil)
   357  		}
   358  		written += 1
   359  	}
   360  	for key, value := range s.membatch.codes {
   361  		if err := dbw.Put(database.CodeKey(key), value); err != nil {
   362  			return written, err
   363  		}
   364  		if s.bloom != nil {
   365  			s.bloom.Add(key[:])
   366  		}
   367  		if s.exist != nil {
   368  			s.exist.Add(key, nil)
   369  		}
   370  		written += 1
   371  	}
   372  
   373  	// Drop the membatch data and return
   374  	s.membatch = newSyncMemBatch()
   375  	return written, nil
   376  }
   377  
   378  // Pending returns the number of state entries currently pending for download.
   379  func (s *TrieSync) Pending() int {
   380  	return len(s.nodeReqs) + len(s.codeReqs)
   381  }
   382  
   383  // schedule inserts a new state retrieval request into the fetch queue. If there
   384  // is already a pending request for this node, the new request will be discarded
   385  // and only a parent reference added to the old one.
   386  func (s *TrieSync) schedule(req *request) {
   387  	reqset := s.nodeReqs
   388  	if req.code {
   389  		reqset = s.codeReqs
   390  	}
   391  	// If we're already requesting this node, add a new reference and stop
   392  	if old, ok := reqset[req.hash]; ok {
   393  		old.parents = append(old.parents, req.parents...)
   394  		return
   395  	}
   396  
   397  	// Count the retrieved trie by depth
   398  	s.retrievedByDepth[req.depth]++
   399  
   400  	reqset[req.hash] = req
   401  
   402  	// Schedule the request for future retrieval. This queue is shared
   403  	// by both node requests and code requests. It can happen that there
   404  	// is a trie node and code has same hash. In this case two elements
   405  	// with same hash and same or different depth will be pushed. But it's
   406  	// ok the worst case is the second response will be treated as duplicated.
   407  	prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents
   408  	for i := 0; i < 14 && i < len(req.path); i++ {
   409  		prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order
   410  	}
   411  	s.queue.Push(req.hash, prio)
   412  }
   413  
   414  // children retrieves all the missing children of a state trie entry for future
   415  // retrieval scheduling.
   416  func (s *TrieSync) children(req *request, object node) ([]*request, error) {
   417  	// Gather all the children of the node, irrelevant whether known or not
   418  	type child struct {
   419  		path  []byte
   420  		node  node
   421  		depth int
   422  	}
   423  	children := []child{}
   424  
   425  	switch node := (object).(type) {
   426  	case *shortNode:
   427  		key := node.Key
   428  		if hasTerm(key) {
   429  			key = key[:len(key)-1]
   430  		}
   431  		children = []child{{
   432  			node:  node.Val,
   433  			path:  append(append([]byte(nil), req.path...), key...),
   434  			depth: req.depth + len(node.Key),
   435  		}}
   436  	case *fullNode:
   437  		for i := 0; i < 17; i++ {
   438  			if node.Children[i] != nil {
   439  				children = append(children, child{
   440  					node:  node.Children[i],
   441  					path:  append(append([]byte(nil), req.path...), byte(i)),
   442  					depth: req.depth + 1,
   443  				})
   444  			}
   445  		}
   446  	default:
   447  		panic(fmt.Sprintf("unknown node: %+v", node))
   448  	}
   449  	// Iterate over the children, and request all unknown ones
   450  	requests := make([]*request, 0, len(children))
   451  	for _, child := range children {
   452  		// Notify any external watcher of a new key/value node
   453  		if req.callback != nil {
   454  			if node, ok := (child.node).(valueNode); ok {
   455  				var paths [][]byte
   456  				if len(child.path) == 2*common.HashLength {
   457  					paths = append(paths, hexToKeybytes(child.path))
   458  				} else if len(child.path) == 4*common.HashLength {
   459  					paths = append(paths, hexToKeybytes(child.path[:2*common.HashLength]))
   460  					paths = append(paths, hexToKeybytes(child.path[2*common.HashLength:]))
   461  				}
   462  				if err := req.callback(paths, child.path, node, req.hash.ExtendZero(), child.depth); err != nil {
   463  					return nil, err
   464  				}
   465  			}
   466  		}
   467  		// If the child references another node, resolve or schedule
   468  		if node, ok := (child.node).(hashNode); ok {
   469  			// Try to resolve the node from the local database
   470  			hash := common.BytesToExtHash(node).Unextend()
   471  			if s.membatch.hasNode(hash) {
   472  				continue
   473  			}
   474  			if s.exist != nil {
   475  				if _, ok := s.exist.Get(hash); ok {
   476  					// already written in migration, skip the node
   477  					continue
   478  				}
   479  			} else if s.bloom == nil || s.bloom.Contains(node) {
   480  				// Bloom filter says this might be a duplicate, double check.
   481  				// If database says yes, then at least the trie node is present
   482  				// and we hold the assumption that it's NOT legacy contract code.
   483  				if ok, _ := s.database.HasTrieNode(hash.ExtendZero()); ok {
   484  					continue
   485  				}
   486  				// False positive, bump fault meter
   487  				bloomFaultMeter.Mark(1)
   488  			}
   489  
   490  			// Locally unknown node, schedule for retrieval
   491  			requests = append(requests, &request{
   492  				path:     child.path,
   493  				hash:     hash,
   494  				parents:  []*request{req},
   495  				depth:    child.depth,
   496  				callback: req.callback,
   497  			})
   498  		}
   499  	}
   500  	return requests, nil
   501  }
   502  
   503  // commit finalizes a retrieval request and stores it into the membatch. If any
   504  // of the referencing parent requests complete due to this commit, they are also
   505  // committed themselves.
   506  func (s *TrieSync) commit(req *request) (err error) {
   507  	// Count the committed trie by depth and Clear the counts of lower depth
   508  	s.committedByDepth[req.depth]++
   509  
   510  	// Write the node content to the membatch
   511  	if req.code {
   512  		s.membatch.codes[req.hash] = req.data
   513  		delete(s.codeReqs, req.hash)
   514  		s.fetches[len(req.path)]--
   515  	} else {
   516  		s.membatch.nodes[req.hash] = req.data
   517  		delete(s.nodeReqs, req.hash)
   518  		s.fetches[len(req.path)]--
   519  	}
   520  	// Check all parents for completion
   521  	for _, parent := range req.parents {
   522  		parent.deps--
   523  		if parent.deps == 0 {
   524  			if err := s.commit(parent); err != nil {
   525  				return err
   526  			}
   527  		}
   528  	}
   529  	return nil
   530  }
   531  
   532  // RetrievedByDepth returns the retrieved trie count by given depth.
   533  // This number is same as the number of nodes that needs to be committed to complete trie sync.
   534  func (s *TrieSync) RetrievedByDepth(depth int) int {
   535  	return s.retrievedByDepth[depth]
   536  }
   537  
   538  // CommittedByDepth returns the committed trie count by given depth.
   539  func (s *TrieSync) CommittedByDepth(depth int) int {
   540  	return s.committedByDepth[depth]
   541  }
   542  
   543  // CalcProgressPercentage returns the progress percentage.
   544  func (s *TrieSync) CalcProgressPercentage() float64 {
   545  	var progress float64
   546  	// depth	max trie	resolution (%)
   547  	// 0	 	1 	 		100.00000
   548  	// 1	 	16 	 		6.25000
   549  	// 2	 	256 	 	0.39063
   550  	// 3	 	4,096 	 	0.02441
   551  	// 4	 	65,536 	 	0.00153
   552  	// 5	 	1,048,576 	0.00010
   553  
   554  	for i := 0; i < 20; i++ {
   555  		c, r := s.CommittedByDepth(i), s.RetrievedByDepth(i)
   556  
   557  		var progressByDepth float64
   558  
   559  		if r == 0 {
   560  			break
   561  		}
   562  
   563  		if r > 0 {
   564  			progressByDepth = float64(c) / float64(r) * 100
   565  			if progressByDepth > progress && i < 4 { // Scan depth 0 ~ 3 for accuracy
   566  				progress = progressByDepth
   567  			}
   568  		}
   569  
   570  		logger.Debug("Trie sync progress by depth #"+strconv.Itoa(i), "committed", c, "retrieved", r, "progress", progressByDepth)
   571  	}
   572  
   573  	logger.Debug("Trie sync progress ", "progress", strconv.FormatFloat(progress, 'f', -1, 64)+"%")
   574  
   575  	return progress
   576  }