github.com/klaytn/klaytn@v1.12.1/storage/statedb/hasher.go (about)

     1  // Modifications Copyright 2018 The klaytn Authors
     2  // Copyright 2015 The go-ethereum Authors
     3  // This file is part of the go-ethereum library.
     4  //
     5  // The go-ethereum library is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Lesser General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // The go-ethereum library is distributed in the hope that it will be useful,
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  // GNU Lesser General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Lesser General Public License
    16  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    17  //
    18  // This file is derived from trie/hasher.go (2018/06/04).
    19  // Modified and improved for the klaytn development.
    20  
    21  package statedb
    22  
    23  import (
    24  	"hash"
    25  	"sync"
    26  
    27  	"github.com/klaytn/klaytn/blockchain/types/account"
    28  	"github.com/klaytn/klaytn/common"
    29  	"github.com/klaytn/klaytn/crypto/sha3"
    30  	"github.com/klaytn/klaytn/rlp"
    31  )
    32  
    33  type hasherOpts struct {
    34  	onleaf      LeafCallback
    35  	pruning     bool // If pruning is true, non-root nodes are attached a fresh nonce.
    36  	storageRoot bool // If both pruning and storageRoot are true, the root node is attached a fresh nonce.
    37  }
    38  
    39  type hasher struct {
    40  	hasherOpts
    41  	tmp    sliceBuffer
    42  	sha    KeccakState
    43  	encbuf rlp.EncoderBuffer
    44  }
    45  
    46  // KeccakState wraps sha3.state. In addition to the usual hash methods, it also supports
    47  // Read to get a variable amount of data from the hash state. Read is faster than Sum
    48  // because it doesn't copy the internal state, but also modifies the internal state.
    49  type KeccakState interface {
    50  	hash.Hash
    51  	Read([]byte) (int, error)
    52  }
    53  
    54  type sliceBuffer []byte
    55  
    56  func (b *sliceBuffer) Write(data []byte) (n int, err error) {
    57  	*b = append(*b, data...)
    58  	return len(data), nil
    59  }
    60  
    61  func (b *sliceBuffer) Reset() {
    62  	*b = (*b)[:0]
    63  }
    64  
    65  // hashers live in a global db.
    66  var hasherPool = sync.Pool{
    67  	New: func() interface{} {
    68  		return &hasher{
    69  			tmp:    make(sliceBuffer, 0, 550), // cap is as large as a full fullNode.
    70  			sha:    sha3.NewKeccak256().(KeccakState),
    71  			encbuf: rlp.NewEncoderBuffer(nil),
    72  		}
    73  	},
    74  }
    75  
    76  func newHasher(opts *hasherOpts) *hasher {
    77  	h := hasherPool.Get().(*hasher)
    78  	if opts == nil {
    79  		opts = &hasherOpts{}
    80  	}
    81  	h.hasherOpts = *opts
    82  	return h
    83  }
    84  
    85  func returnHasherToPool(h *hasher) {
    86  	hasherPool.Put(h)
    87  }
    88  
    89  // hashRoot is similar to hashNode() but adds special treatment for the root node.
    90  func (h *hasher) hashRoot(n node, db *Database, force bool) (node, node) {
    91  	return h.hashNode(n, db, force, true)
    92  }
    93  
    94  // hash is similar to hashNode() but assumes that the node is not a root node.
    95  func (h *hasher) hash(n node, db *Database, force bool) (node, node) {
    96  	return h.hashNode(n, db, force, false)
    97  }
    98  
    99  // hashNode collapses a node down into a hash node, also returning a copy of the
   100  // original node initialized with the computed hash to replace the original one.
   101  //
   102  // hashNode is for hasher's internal use only.
   103  // Please use hashRoot() or hash() for readability.
   104  func (h *hasher) hashNode(n node, db *Database, force bool, onRoot bool) (node, node) {
   105  	// If we're not storing the node, just hashing, use available cached data
   106  	if hash, dirty := n.cache(); hash != nil {
   107  		if db == nil {
   108  			return hash, n
   109  		}
   110  		if !dirty {
   111  			switch n.(type) {
   112  			case *fullNode, *shortNode:
   113  				return hash, hash
   114  			default:
   115  				return hash, n
   116  			}
   117  		}
   118  	}
   119  	// Trie not processed yet or needs storage, walk the children
   120  	collapsed, cached := h.hashChildren(n, db, onRoot)
   121  	hashed, lenEncoded := h.store(collapsed, db, force, onRoot)
   122  	// Cache the hash of the node for later reuse and remove
   123  	// the dirty flag in commit mode. It's fine to assign these values directly
   124  	// without copying the node first because hashChildren copies it.
   125  	cachedHash, _ := hashed.(hashNode)
   126  	switch cn := cached.(type) {
   127  	case *shortNode:
   128  		cn.flags.hash = cachedHash
   129  		cn.flags.lenEncoded = lenEncoded
   130  		if db != nil {
   131  			cn.flags.dirty = false
   132  		}
   133  	case *fullNode:
   134  		cn.flags.hash = cachedHash
   135  		cn.flags.lenEncoded = lenEncoded
   136  		if db != nil {
   137  			cn.flags.dirty = false
   138  		}
   139  	}
   140  	return hashed, cached
   141  }
   142  
   143  // hashChildren replaces the children of a node with their hashes if the encoded
   144  // size of the child is larger than a hash, returning the collapsed node as well
   145  // as a replacement for the original node with the child hashes cached in.
   146  func (h *hasher) hashChildren(original node, db *Database, onRoot bool) (node, node) {
   147  	switch n := original.(type) {
   148  	case *shortNode:
   149  		// Hash the short node's child, caching the newly hashed subtree
   150  		collapsed, cached := n.copy(), n.copy()
   151  		collapsed.Key = hexToCompact(n.Key)
   152  		cached.Key = common.CopyBytes(n.Key)
   153  
   154  		if _, ok := n.Val.(valueNode); !ok {
   155  			collapsed.Val, cached.Val = h.hash(n.Val, db, false)
   156  		}
   157  		return collapsed, cached
   158  
   159  	case *fullNode:
   160  		// Hash the full node's children, caching the newly hashed subtrees
   161  		collapsed, cached := n.copy(), n.copy()
   162  
   163  		if onRoot {
   164  			var wg sync.WaitGroup
   165  			wg.Add(16)
   166  			for i := 0; i < 16; i++ {
   167  				if n.Children[i] != nil {
   168  					go func(i int) {
   169  						childHasher := newHasher(&h.hasherOpts)
   170  						collapsed.Children[i], cached.Children[i] = childHasher.hash(n.Children[i], db, false)
   171  						returnHasherToPool(childHasher)
   172  						wg.Done()
   173  					}(i)
   174  				} else {
   175  					wg.Done()
   176  				}
   177  			}
   178  			wg.Wait()
   179  		} else {
   180  			for i := 0; i < 16; i++ {
   181  				if n.Children[i] != nil {
   182  					collapsed.Children[i], cached.Children[i] = h.hash(n.Children[i], db, false)
   183  				}
   184  			}
   185  		}
   186  		cached.Children[16] = n.Children[16]
   187  		return collapsed, cached
   188  
   189  	default:
   190  		// Value and hash nodes don't have children so they're left as were
   191  		return n, original
   192  	}
   193  }
   194  
   195  // store hashes the node n and if we have a storage layer specified, it writes
   196  // the key/value pair to it and tracks any node->child references as well as any
   197  // node->external trie references.
   198  func (h *hasher) store(n node, db *Database, force bool, onRoot bool) (node, uint16) {
   199  	// Don't store hashes or empty nodes.
   200  	if _, isHash := n.(hashNode); n == nil || isHash {
   201  		return n, 0
   202  	}
   203  	// hash is for the merkle proof. hash = Keccak(rlp.Encode(nodeForHashing(n)))
   204  	// lenEncoded is for Database size accounting. lenEncoded = len(rlp.Encode(nodeForStoring(n)))
   205  	hash, _ := n.cache()
   206  	lenEncoded := n.lenEncoded()
   207  
   208  	// Calculate lenEncoded if not set
   209  	if hash == nil || lenEncoded == 0 {
   210  		// Generate the RLP encoding of the node for database storing
   211  		h.nodeForStoring(n).encode(h.encbuf)
   212  		enc := h.encodedBytes()
   213  		lenEncoded = uint16(len(enc))
   214  	}
   215  	if lenEncoded < 32 && !force {
   216  		return n, lenEncoded // Nodes smaller than 32 bytes are stored inside their parent
   217  	}
   218  
   219  	// Calculate hash if not set
   220  	if hash == nil {
   221  		// Generate the RLP encoding of the node for Merkle hashing
   222  		h.nodeForHashing(n).encode(h.encbuf)
   223  		enc := h.encodedBytes()
   224  		hash = h.hashData(enc, onRoot)
   225  	}
   226  
   227  	if db != nil {
   228  		// We are pooling the trie nodes into an intermediate memory cache
   229  		hash := common.BytesToExtHash(hash)
   230  
   231  		db.lock.Lock()
   232  		db.insert(hash, lenEncoded, h.nodeForStoring(n))
   233  		db.lock.Unlock()
   234  
   235  		// Track external references from account->storage trie
   236  		if h.onleaf != nil {
   237  			switch n := n.(type) {
   238  			case *shortNode:
   239  				if child, ok := n.Val.(valueNode); ok {
   240  					h.onleaf(nil, nil, child, hash, 0)
   241  				}
   242  			case *fullNode:
   243  				for i := 0; i < 16; i++ {
   244  					if child, ok := n.Children[i].(valueNode); ok {
   245  						h.onleaf(nil, nil, child, hash, 0)
   246  					}
   247  				}
   248  			}
   249  		}
   250  	}
   251  	return hash, lenEncoded
   252  }
   253  
   254  func (h *hasher) hashData(data []byte, onRoot bool) hashNode {
   255  	var hash common.Hash
   256  	h.sha.Reset()
   257  	h.sha.Write(data)
   258  	h.sha.Read(hash[:])
   259  	if h.pruning && (h.storageRoot || !onRoot) {
   260  		return hash.Extend().Bytes()
   261  	} else {
   262  		return hash.ExtendZero().Bytes()
   263  	}
   264  }
   265  
   266  // encodedBytes returns the result of the last encoding operation on h.encbuf.
   267  // This also resets the encoder buffer.
   268  func (h *hasher) encodedBytes() []byte {
   269  	h.tmp = h.encbuf.AppendToBytes(h.tmp[:0])
   270  	h.encbuf.Reset(nil)
   271  	return h.tmp
   272  }
   273  
   274  func (h *hasher) nodeForHashing(original node) node {
   275  	return unextendNode(original, false)
   276  }
   277  
   278  func (h *hasher) nodeForStoring(original node) node {
   279  	return unextendNode(original, true)
   280  }
   281  
   282  func unextendNode(original node, preserveExtHash bool) node {
   283  	switch n := original.(type) {
   284  	case *shortNode:
   285  		stored := n.copy()
   286  		stored.Val = unextendNode(n.Val, preserveExtHash)
   287  		return stored
   288  	case *fullNode:
   289  		stored := n.copy()
   290  		for i, child := range stored.Children {
   291  			stored.Children[i] = unextendNode(child, preserveExtHash)
   292  		}
   293  		return stored
   294  	case hashNode:
   295  		exthash := common.BytesToExtHash(n)
   296  		if exthash.IsZeroExtended() { // Always unextend zero extensions
   297  			return hashNode(exthash.Unextend().Bytes())
   298  		} else if !preserveExtHash { // We're given an ExtHash and will strip extension for merkle hash
   299  			return hashNode(exthash.Unextend().Bytes())
   300  		} else { // We're given an ExtHash and will preserve extension for storing
   301  			return n
   302  		}
   303  	case valueNode:
   304  		if !preserveExtHash {
   305  			return valueNode(account.UnextendSerializedAccount(n))
   306  		} else {
   307  			// Zero extensions should have been unextended by AccountSerializer,
   308  			// hence no need to check IsZeroExtended() here.
   309  			return n
   310  		}
   311  	default:
   312  		return n
   313  	}
   314  }