github.com/daethereum/go-dae@v2.2.3+incompatible/trie/committer.go (about)

     1  // Copyright 2020 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package trie
    18  
    19  import (
    20  	"errors"
    21  	"fmt"
    22  	"sync"
    23  
    24  	"github.com/daethereum/go-dae/common"
    25  )
    26  
    27  // leafChanSize is the size of the leafCh. It's a pretty arbitrary number, to allow
    28  // some parallelism but not incur too much memory overhead.
    29  const leafChanSize = 200
    30  
    31  // leaf represents a trie leaf value
    32  type leaf struct {
    33  	size int         // size of the rlp data (estimate)
    34  	hash common.Hash // hash of rlp data
    35  	node node        // the node to commit
    36  	path []byte      // the path from the root node
    37  }
    38  
    39  // committer is a type used for the trie Commit operation. A committer has some
    40  // internal preallocated temp space, and also a callback that is invoked when
    41  // leaves are committed. The leafs are passed through the `leafCh`,  to allow
    42  // some level of parallelism.
    43  // By 'some level' of parallelism, it's still the case that all leaves will be
    44  // processed sequentially - onleaf will never be called in parallel or out of order.
    45  type committer struct {
    46  	onleaf LeafCallback
    47  	leafCh chan *leaf
    48  }
    49  
    50  // committers live in a global sync.Pool
    51  var committerPool = sync.Pool{
    52  	New: func() interface{} {
    53  		return &committer{}
    54  	},
    55  }
    56  
    57  // newCommitter creates a new committer or picks one from the pool.
    58  func newCommitter() *committer {
    59  	return committerPool.Get().(*committer)
    60  }
    61  
    62  func returnCommitterToPool(h *committer) {
    63  	h.onleaf = nil
    64  	h.leafCh = nil
    65  	committerPool.Put(h)
    66  }
    67  
    68  // Commit collapses a node down into a hash node and inserts it into the database
    69  func (c *committer) Commit(n node, db *Database) (hashNode, int, error) {
    70  	if db == nil {
    71  		return nil, 0, errors.New("no db provided")
    72  	}
    73  	h, committed, err := c.commit(nil, n, db)
    74  	if err != nil {
    75  		return nil, 0, err
    76  	}
    77  	return h.(hashNode), committed, nil
    78  }
    79  
    80  // commit collapses a node down into a hash node and inserts it into the database
    81  func (c *committer) commit(path []byte, n node, db *Database) (node, int, error) {
    82  	// if this path is clean, use available cached data
    83  	hash, dirty := n.cache()
    84  	if hash != nil && !dirty {
    85  		return hash, 0, nil
    86  	}
    87  	// Commit children, then parent, and remove the dirty flag.
    88  	switch cn := n.(type) {
    89  	case *shortNode:
    90  		// Commit child
    91  		collapsed := cn.copy()
    92  
    93  		// If the child is fullNode, recursively commit,
    94  		// otherwise it can only be hashNode or valueNode.
    95  		var childCommitted int
    96  		if _, ok := cn.Val.(*fullNode); ok {
    97  			childV, committed, err := c.commit(append(path, cn.Key...), cn.Val, db)
    98  			if err != nil {
    99  				return nil, 0, err
   100  			}
   101  			collapsed.Val, childCommitted = childV, committed
   102  		}
   103  		// The key needs to be copied, since we're delivering it to database
   104  		collapsed.Key = hexToCompact(cn.Key)
   105  		hashedNode := c.store(path, collapsed, db)
   106  		if hn, ok := hashedNode.(hashNode); ok {
   107  			return hn, childCommitted + 1, nil
   108  		}
   109  		return collapsed, childCommitted, nil
   110  	case *fullNode:
   111  		hashedKids, childCommitted, err := c.commitChildren(path, cn, db)
   112  		if err != nil {
   113  			return nil, 0, err
   114  		}
   115  		collapsed := cn.copy()
   116  		collapsed.Children = hashedKids
   117  
   118  		hashedNode := c.store(path, collapsed, db)
   119  		if hn, ok := hashedNode.(hashNode); ok {
   120  			return hn, childCommitted + 1, nil
   121  		}
   122  		return collapsed, childCommitted, nil
   123  	case hashNode:
   124  		return cn, 0, nil
   125  	default:
   126  		// nil, valuenode shouldn't be committed
   127  		panic(fmt.Sprintf("%T: invalid node: %v", n, n))
   128  	}
   129  }
   130  
   131  // commitChildren commits the children of the given fullnode
   132  func (c *committer) commitChildren(path []byte, n *fullNode, db *Database) ([17]node, int, error) {
   133  	var (
   134  		committed int
   135  		children  [17]node
   136  	)
   137  	for i := 0; i < 16; i++ {
   138  		child := n.Children[i]
   139  		if child == nil {
   140  			continue
   141  		}
   142  		// If it's the hashed child, save the hash value directly.
   143  		// Note: it's impossible that the child in range [0, 15]
   144  		// is a valueNode.
   145  		if hn, ok := child.(hashNode); ok {
   146  			children[i] = hn
   147  			continue
   148  		}
   149  		// Commit the child recursively and store the "hashed" value.
   150  		// Note the returned node can be some embedded nodes, so it's
   151  		// possible the type is not hashNode.
   152  		hashed, childCommitted, err := c.commit(append(path, byte(i)), child, db)
   153  		if err != nil {
   154  			return children, 0, err
   155  		}
   156  		children[i] = hashed
   157  		committed += childCommitted
   158  	}
   159  	// For the 17th child, it's possible the type is valuenode.
   160  	if n.Children[16] != nil {
   161  		children[16] = n.Children[16]
   162  	}
   163  	return children, committed, nil
   164  }
   165  
   166  // store hashes the node n and if we have a storage layer specified, it writes
   167  // the key/value pair to it and tracks any node->child references as well as any
   168  // node->external trie references.
   169  func (c *committer) store(path []byte, n node, db *Database) node {
   170  	// Larger nodes are replaced by their hash and stored in the database.
   171  	var (
   172  		hash, _ = n.cache()
   173  		size    int
   174  	)
   175  	if hash == nil {
   176  		// This was not generated - must be a small node stored in the parent.
   177  		// In theory, we should apply the leafCall here if it's not nil(embedded
   178  		// node usually contains value). But small value(less than 32bytes) is
   179  		// not our target.
   180  		return n
   181  	} else {
   182  		// We have the hash already, estimate the RLP encoding-size of the node.
   183  		// The size is used for mem tracking, does not need to be exact
   184  		size = estimateSize(n)
   185  	}
   186  	// If we're using channel-based leaf-reporting, send to channel.
   187  	// The leaf channel will be active only when there an active leaf-callback
   188  	if c.leafCh != nil {
   189  		c.leafCh <- &leaf{
   190  			size: size,
   191  			hash: common.BytesToHash(hash),
   192  			node: n,
   193  			path: path,
   194  		}
   195  	} else if db != nil {
   196  		// No leaf-callback used, but there's still a database. Do serial
   197  		// insertion
   198  		db.insert(common.BytesToHash(hash), size, n)
   199  	}
   200  	return hash
   201  }
   202  
   203  // commitLoop does the actual insert + leaf callback for nodes.
   204  func (c *committer) commitLoop(db *Database) {
   205  	for item := range c.leafCh {
   206  		var (
   207  			hash = item.hash
   208  			size = item.size
   209  			n    = item.node
   210  		)
   211  		// We are pooling the trie nodes into an intermediate memory cache
   212  		db.insert(hash, size, n)
   213  
   214  		if c.onleaf != nil {
   215  			switch n := n.(type) {
   216  			case *shortNode:
   217  				if child, ok := n.Val.(valueNode); ok {
   218  					c.onleaf(nil, nil, child, hash, nil)
   219  				}
   220  			case *fullNode:
   221  				// For children in range [0, 15], it's impossible
   222  				// to contain valueNode. Only check the 17th child.
   223  				if n.Children[16] != nil {
   224  					c.onleaf(nil, nil, n.Children[16].(valueNode), hash, nil)
   225  				}
   226  			}
   227  		}
   228  	}
   229  }
   230  
   231  // estimateSize estimates the size of an rlp-encoded node, without actually
   232  // rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie
   233  // with 1000 leafs, the only errors above 1% are on small shortnodes, where this
   234  // method overestimates by 2 or 3 bytes (e.g. 37 instead of 35)
   235  func estimateSize(n node) int {
   236  	switch n := n.(type) {
   237  	case *shortNode:
   238  		// A short node contains a compacted key, and a value.
   239  		return 3 + len(n.Key) + estimateSize(n.Val)
   240  	case *fullNode:
   241  		// A full node contains up to 16 hashes (some nils), and a key
   242  		s := 3
   243  		for i := 0; i < 16; i++ {
   244  			if child := n.Children[i]; child != nil {
   245  				s += estimateSize(child)
   246  			} else {
   247  				s++
   248  			}
   249  		}
   250  		return s
   251  	case valueNode:
   252  		return 1 + len(n)
   253  	case hashNode:
   254  		return 1 + len(n)
   255  	default:
   256  		panic(fmt.Sprintf("node type %T", n))
   257  	}
   258  }