github.com/aquanetwork/aquachain@v1.7.8/trie/database.go (about)

     1  // Copyright 2018 The aquachain Authors
     2  // This file is part of the aquachain library.
     3  //
     4  // The aquachain library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The aquachain library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the aquachain library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package trie
    18  
    19  import (
    20  	"sync"
    21  	"time"
    22  
    23  	"gitlab.com/aquachain/aquachain/aquadb"
    24  	"gitlab.com/aquachain/aquachain/common"
    25  	"gitlab.com/aquachain/aquachain/common/log"
    26  )
    27  
    28  // secureKeyPrefix is the database key prefix used to store trie node preimages.
    29  var secureKeyPrefix = []byte("secure-key-")
    30  
    31  // secureKeyLength is the length of the above prefix + 32byte hash.
    32  const secureKeyLength = 11 + 32
    33  
    34  // DatabaseReader wraps the Get and Has method of a backing store for the trie.
    35  type DatabaseReader interface {
    36  	// Get retrieves the value associated with key form the database.
    37  	Get(key []byte) (value []byte, err error)
    38  
    39  	// Has retrieves whether a key is present in the database.
    40  	Has(key []byte) (bool, error)
    41  }
    42  
    43  // Database is an intermediate write layer between the trie data structures and
    44  // the disk database. The aim is to accumulate trie writes in-memory and only
    45  // periodically flush a couple tries to disk, garbage collecting the remainder.
    46  type Database struct {
    47  	diskdb aquadb.Database // Persistent storage for matured trie nodes
    48  
    49  	nodes     map[common.Hash]*cachedNode // Data and references relationships of a node
    50  	preimages map[common.Hash][]byte      // Preimages of nodes from the secure trie
    51  	seckeybuf [secureKeyLength]byte       // Ephemeral buffer for calculating preimage keys
    52  
    53  	gctime  time.Duration      // Time spent on garbage collection since last commit
    54  	gcnodes uint64             // Nodes garbage collected since last commit
    55  	gcsize  common.StorageSize // Data storage garbage collected since last commit
    56  
    57  	nodesSize     common.StorageSize // Storage size of the nodes cache
    58  	preimagesSize common.StorageSize // Storage size of the preimages cache
    59  
    60  	lock sync.RWMutex
    61  }
    62  
    63  // cachedNode is all the information we know about a single cached node in the
    64  // memory database write layer.
    65  type cachedNode struct {
    66  	blob     []byte              // Cached data block of the trie node
    67  	parents  int                 // Number of live nodes referencing this one
    68  	children map[common.Hash]int // Children referenced by this nodes
    69  }
    70  
    71  // NewDatabase creates a new trie database to store ephemeral trie content before
    72  // its written out to disk or garbage collected.
    73  func NewDatabase(diskdb aquadb.Database) *Database {
    74  	return &Database{
    75  		diskdb: diskdb,
    76  		nodes: map[common.Hash]*cachedNode{
    77  			{}: {children: make(map[common.Hash]int)},
    78  		},
    79  		preimages: make(map[common.Hash][]byte),
    80  	}
    81  }
    82  
    83  // DiskDB retrieves the persistent storage backing the trie database.
    84  func (db *Database) DiskDB() DatabaseReader {
    85  	return db.diskdb
    86  }
    87  
    88  // Insert writes a new trie node to the memory database if it's yet unknown. The
    89  // method will make a copy of the slice.
    90  func (db *Database) Insert(hash common.Hash, blob []byte) {
    91  	db.lock.Lock()
    92  	defer db.lock.Unlock()
    93  
    94  	db.insert(hash, blob)
    95  }
    96  
    97  // insert is the private locked version of Insert.
    98  func (db *Database) insert(hash common.Hash, blob []byte) {
    99  	if _, ok := db.nodes[hash]; ok {
   100  		return
   101  	}
   102  	db.nodes[hash] = &cachedNode{
   103  		blob:     common.CopyBytes(blob),
   104  		children: make(map[common.Hash]int),
   105  	}
   106  	db.nodesSize += common.StorageSize(common.HashLength + len(blob))
   107  }
   108  
   109  // insertPreimage writes a new trie node pre-image to the memory database if it's
   110  // yet unknown. The method will make a copy of the slice.
   111  //
   112  // Note, this method assumes that the database's lock is held!
   113  func (db *Database) insertPreimage(hash common.Hash, preimage []byte) {
   114  	if _, ok := db.preimages[hash]; ok {
   115  		return
   116  	}
   117  	db.preimages[hash] = common.CopyBytes(preimage)
   118  	db.preimagesSize += common.StorageSize(common.HashLength + len(preimage))
   119  }
   120  
   121  // Node retrieves a cached trie node from memory. If it cannot be found cached,
   122  // the method queries the persistent database for the content.
   123  func (db *Database) Node(hash common.Hash) ([]byte, error) {
   124  	// Retrieve the node from cache if available
   125  	db.lock.RLock()
   126  	node := db.nodes[hash]
   127  	db.lock.RUnlock()
   128  
   129  	if node != nil {
   130  		return node.blob, nil
   131  	}
   132  	// Content unavailable in memory, attempt to retrieve from disk
   133  	return db.diskdb.Get(hash[:])
   134  }
   135  
   136  // preimage retrieves a cached trie node pre-image from memory. If it cannot be
   137  // found cached, the method queries the persistent database for the content.
   138  func (db *Database) preimage(hash common.Hash) ([]byte, error) {
   139  	// Retrieve the node from cache if available
   140  	db.lock.RLock()
   141  	preimage := db.preimages[hash]
   142  	db.lock.RUnlock()
   143  
   144  	if preimage != nil {
   145  		return preimage, nil
   146  	}
   147  	// Content unavailable in memory, attempt to retrieve from disk
   148  	return db.diskdb.Get(db.secureKey(hash[:]))
   149  }
   150  
   151  // secureKey returns the database key for the preimage of key, as an ephemeral
   152  // buffer. The caller must not hold onto the return value because it will become
   153  // invalid on the next call.
   154  func (db *Database) secureKey(key []byte) []byte {
   155  	buf := append(db.seckeybuf[:0], secureKeyPrefix...)
   156  	buf = append(buf, key...)
   157  	return buf
   158  }
   159  
   160  // Nodes retrieves the hashes of all the nodes cached within the memory database.
   161  // This method is extremely expensive and should only be used to validate internal
   162  // states in test code.
   163  func (db *Database) Nodes() []common.Hash {
   164  	db.lock.RLock()
   165  	defer db.lock.RUnlock()
   166  
   167  	var hashes = make([]common.Hash, 0, len(db.nodes))
   168  	for hash := range db.nodes {
   169  		if hash != (common.Hash{}) { // Special case for "root" references/nodes
   170  			hashes = append(hashes, hash)
   171  		}
   172  	}
   173  	return hashes
   174  }
   175  
   176  // Reference adds a new reference from a parent node to a child node.
   177  func (db *Database) Reference(child common.Hash, parent common.Hash) {
   178  	db.lock.RLock()
   179  	defer db.lock.RUnlock()
   180  
   181  	db.reference(child, parent)
   182  }
   183  
   184  // reference is the private locked version of Reference.
   185  func (db *Database) reference(child common.Hash, parent common.Hash) {
   186  	// If the node does not exist, it's a node pulled from disk, skip
   187  	node, ok := db.nodes[child]
   188  	if !ok {
   189  		return
   190  	}
   191  	// If the reference already exists, only duplicate for roots
   192  	if _, ok = db.nodes[parent].children[child]; ok && parent != (common.Hash{}) {
   193  		return
   194  	}
   195  	node.parents++
   196  	db.nodes[parent].children[child]++
   197  }
   198  
   199  // Dereference removes an existing reference from a parent node to a child node.
   200  func (db *Database) Dereference(child common.Hash, parent common.Hash) {
   201  	db.lock.Lock()
   202  	defer db.lock.Unlock()
   203  
   204  	nodes, storage, start := len(db.nodes), db.nodesSize, time.Now()
   205  	db.dereference(child, parent)
   206  
   207  	db.gcnodes += uint64(nodes - len(db.nodes))
   208  	db.gcsize += storage - db.nodesSize
   209  	db.gctime += time.Since(start)
   210  
   211  	log.Trace("Dereferenced trie from memory database", "nodes", nodes-len(db.nodes), "size", storage-db.nodesSize, "time", time.Since(start),
   212  		"gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.nodes), "livesize", db.nodesSize)
   213  }
   214  
   215  // dereference is the private locked version of Dereference.
   216  func (db *Database) dereference(child common.Hash, parent common.Hash) {
   217  	// Dereference the parent-child
   218  	node := db.nodes[parent]
   219  
   220  	node.children[child]--
   221  	if node.children[child] == 0 {
   222  		delete(node.children, child)
   223  	}
   224  	// If the node does not exist, it's a previously committed node.
   225  	node, ok := db.nodes[child]
   226  	if !ok {
   227  		return
   228  	}
   229  	// If there are no more references to the child, delete it and cascade
   230  	node.parents--
   231  	if node.parents == 0 {
   232  		for hash := range node.children {
   233  			db.dereference(hash, child)
   234  		}
   235  		delete(db.nodes, child)
   236  		db.nodesSize -= common.StorageSize(common.HashLength + len(node.blob))
   237  	}
   238  }
   239  
   240  // Commit iterates over all the children of a particular node, writes them out
   241  // to disk, forcefully tearing down all references in both directions.
   242  //
   243  // As a side effect, all pre-images accumulated up to this point are also written.
   244  func (db *Database) Commit(node common.Hash, report bool) error {
   245  	// Create a database batch to flush persistent data out. It is important that
   246  	// outside code doesn't see an inconsistent state (referenced data removed from
   247  	// memory cache during commit but not yet in persistent storage). This is ensured
   248  	// by only uncaching existing data when the database write finalizes.
   249  	db.lock.RLock()
   250  
   251  	start := time.Now()
   252  	batch := db.diskdb.NewBatch()
   253  
   254  	// Move all of the accumulated preimages into a write batch
   255  	for hash, preimage := range db.preimages {
   256  		if err := batch.Put(db.secureKey(hash[:]), preimage); err != nil {
   257  			log.Error("Failed to commit preimage from trie database", "err", err)
   258  			db.lock.RUnlock()
   259  			return err
   260  		}
   261  		if batch.ValueSize() > aquadb.IdealBatchSize {
   262  			if err := batch.Write(); err != nil {
   263  				return err
   264  			}
   265  			batch.Reset()
   266  		}
   267  	}
   268  	// Move the trie itself into the batch, flushing if enough data is accumulated
   269  	nodes, storage := len(db.nodes), db.nodesSize+db.preimagesSize
   270  	if err := db.commit(node, batch); err != nil {
   271  		log.Error("Failed to commit trie from trie database", "err", err)
   272  		db.lock.RUnlock()
   273  		return err
   274  	}
   275  	// Write batch ready, unlock for readers during persistence
   276  	if err := batch.Write(); err != nil {
   277  		log.Error("Failed to write trie to disk", "err", err)
   278  		db.lock.RUnlock()
   279  		return err
   280  	}
   281  	db.lock.RUnlock()
   282  
   283  	// Write successful, clear out the flushed data
   284  	db.lock.Lock()
   285  	defer db.lock.Unlock()
   286  
   287  	db.preimages = make(map[common.Hash][]byte)
   288  	db.preimagesSize = 0
   289  
   290  	db.uncache(node)
   291  
   292  	logger := log.Info
   293  	if !report {
   294  		logger = log.Debug
   295  	}
   296  	logger("Persisted trie from memory database", "nodes", nodes-len(db.nodes), "size", storage-db.nodesSize, "time", time.Since(start),
   297  		"gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.nodes), "livesize", db.nodesSize)
   298  
   299  	// Reset the garbage collection statistics
   300  	db.gcnodes, db.gcsize, db.gctime = 0, 0, 0
   301  
   302  	return nil
   303  }
   304  
   305  // commit is the private locked version of Commit.
   306  func (db *Database) commit(hash common.Hash, batch aquadb.Batch) error {
   307  	// If the node does not exist, it's a previously committed node
   308  	node, ok := db.nodes[hash]
   309  	if !ok {
   310  		return nil
   311  	}
   312  	for child := range node.children {
   313  		if err := db.commit(child, batch); err != nil {
   314  			return err
   315  		}
   316  	}
   317  	if err := batch.Put(hash[:], node.blob); err != nil {
   318  		return err
   319  	}
   320  	// If we've reached an optimal match size, commit and start over
   321  	if batch.ValueSize() >= aquadb.IdealBatchSize {
   322  		if err := batch.Write(); err != nil {
   323  			return err
   324  		}
   325  		batch.Reset()
   326  	}
   327  	return nil
   328  }
   329  
   330  // uncache is the post-processing step of a commit operation where the already
   331  // persisted trie is removed from the cache. The reason behind the two-phase
   332  // commit is to ensure consistent data availability while moving from memory
   333  // to disk.
   334  func (db *Database) uncache(hash common.Hash) {
   335  	// If the node does not exist, we're done on this path
   336  	node, ok := db.nodes[hash]
   337  	if !ok {
   338  		return
   339  	}
   340  	// Otherwise uncache the node's subtries and remove the node itself too
   341  	for child := range node.children {
   342  		db.uncache(child)
   343  	}
   344  	delete(db.nodes, hash)
   345  	db.nodesSize -= common.StorageSize(common.HashLength + len(node.blob))
   346  }
   347  
   348  // Size returns the current storage size of the memory cache in front of the
   349  // persistent database layer.
   350  func (db *Database) Size() common.StorageSize {
   351  	db.lock.RLock()
   352  	defer db.lock.RUnlock()
   353  
   354  	return db.nodesSize + db.preimagesSize
   355  }