github.com/aquanetwork/aquachain@v1.7.8/trie/database.go (about) 1 // Copyright 2018 The aquachain Authors 2 // This file is part of the aquachain library. 3 // 4 // The aquachain library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The aquachain library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the aquachain library. If not, see <http://www.gnu.org/licenses/>. 16 17 package trie 18 19 import ( 20 "sync" 21 "time" 22 23 "gitlab.com/aquachain/aquachain/aquadb" 24 "gitlab.com/aquachain/aquachain/common" 25 "gitlab.com/aquachain/aquachain/common/log" 26 ) 27 28 // secureKeyPrefix is the database key prefix used to store trie node preimages. 29 var secureKeyPrefix = []byte("secure-key-") 30 31 // secureKeyLength is the length of the above prefix + 32byte hash. 32 const secureKeyLength = 11 + 32 33 34 // DatabaseReader wraps the Get and Has method of a backing store for the trie. 35 type DatabaseReader interface { 36 // Get retrieves the value associated with key form the database. 37 Get(key []byte) (value []byte, err error) 38 39 // Has retrieves whether a key is present in the database. 40 Has(key []byte) (bool, error) 41 } 42 43 // Database is an intermediate write layer between the trie data structures and 44 // the disk database. The aim is to accumulate trie writes in-memory and only 45 // periodically flush a couple tries to disk, garbage collecting the remainder. 46 type Database struct { 47 diskdb aquadb.Database // Persistent storage for matured trie nodes 48 49 nodes map[common.Hash]*cachedNode // Data and references relationships of a node 50 preimages map[common.Hash][]byte // Preimages of nodes from the secure trie 51 seckeybuf [secureKeyLength]byte // Ephemeral buffer for calculating preimage keys 52 53 gctime time.Duration // Time spent on garbage collection since last commit 54 gcnodes uint64 // Nodes garbage collected since last commit 55 gcsize common.StorageSize // Data storage garbage collected since last commit 56 57 nodesSize common.StorageSize // Storage size of the nodes cache 58 preimagesSize common.StorageSize // Storage size of the preimages cache 59 60 lock sync.RWMutex 61 } 62 63 // cachedNode is all the information we know about a single cached node in the 64 // memory database write layer. 65 type cachedNode struct { 66 blob []byte // Cached data block of the trie node 67 parents int // Number of live nodes referencing this one 68 children map[common.Hash]int // Children referenced by this nodes 69 } 70 71 // NewDatabase creates a new trie database to store ephemeral trie content before 72 // its written out to disk or garbage collected. 73 func NewDatabase(diskdb aquadb.Database) *Database { 74 return &Database{ 75 diskdb: diskdb, 76 nodes: map[common.Hash]*cachedNode{ 77 {}: {children: make(map[common.Hash]int)}, 78 }, 79 preimages: make(map[common.Hash][]byte), 80 } 81 } 82 83 // DiskDB retrieves the persistent storage backing the trie database. 84 func (db *Database) DiskDB() DatabaseReader { 85 return db.diskdb 86 } 87 88 // Insert writes a new trie node to the memory database if it's yet unknown. The 89 // method will make a copy of the slice. 90 func (db *Database) Insert(hash common.Hash, blob []byte) { 91 db.lock.Lock() 92 defer db.lock.Unlock() 93 94 db.insert(hash, blob) 95 } 96 97 // insert is the private locked version of Insert. 98 func (db *Database) insert(hash common.Hash, blob []byte) { 99 if _, ok := db.nodes[hash]; ok { 100 return 101 } 102 db.nodes[hash] = &cachedNode{ 103 blob: common.CopyBytes(blob), 104 children: make(map[common.Hash]int), 105 } 106 db.nodesSize += common.StorageSize(common.HashLength + len(blob)) 107 } 108 109 // insertPreimage writes a new trie node pre-image to the memory database if it's 110 // yet unknown. The method will make a copy of the slice. 111 // 112 // Note, this method assumes that the database's lock is held! 113 func (db *Database) insertPreimage(hash common.Hash, preimage []byte) { 114 if _, ok := db.preimages[hash]; ok { 115 return 116 } 117 db.preimages[hash] = common.CopyBytes(preimage) 118 db.preimagesSize += common.StorageSize(common.HashLength + len(preimage)) 119 } 120 121 // Node retrieves a cached trie node from memory. If it cannot be found cached, 122 // the method queries the persistent database for the content. 123 func (db *Database) Node(hash common.Hash) ([]byte, error) { 124 // Retrieve the node from cache if available 125 db.lock.RLock() 126 node := db.nodes[hash] 127 db.lock.RUnlock() 128 129 if node != nil { 130 return node.blob, nil 131 } 132 // Content unavailable in memory, attempt to retrieve from disk 133 return db.diskdb.Get(hash[:]) 134 } 135 136 // preimage retrieves a cached trie node pre-image from memory. If it cannot be 137 // found cached, the method queries the persistent database for the content. 138 func (db *Database) preimage(hash common.Hash) ([]byte, error) { 139 // Retrieve the node from cache if available 140 db.lock.RLock() 141 preimage := db.preimages[hash] 142 db.lock.RUnlock() 143 144 if preimage != nil { 145 return preimage, nil 146 } 147 // Content unavailable in memory, attempt to retrieve from disk 148 return db.diskdb.Get(db.secureKey(hash[:])) 149 } 150 151 // secureKey returns the database key for the preimage of key, as an ephemeral 152 // buffer. The caller must not hold onto the return value because it will become 153 // invalid on the next call. 154 func (db *Database) secureKey(key []byte) []byte { 155 buf := append(db.seckeybuf[:0], secureKeyPrefix...) 156 buf = append(buf, key...) 157 return buf 158 } 159 160 // Nodes retrieves the hashes of all the nodes cached within the memory database. 161 // This method is extremely expensive and should only be used to validate internal 162 // states in test code. 163 func (db *Database) Nodes() []common.Hash { 164 db.lock.RLock() 165 defer db.lock.RUnlock() 166 167 var hashes = make([]common.Hash, 0, len(db.nodes)) 168 for hash := range db.nodes { 169 if hash != (common.Hash{}) { // Special case for "root" references/nodes 170 hashes = append(hashes, hash) 171 } 172 } 173 return hashes 174 } 175 176 // Reference adds a new reference from a parent node to a child node. 177 func (db *Database) Reference(child common.Hash, parent common.Hash) { 178 db.lock.RLock() 179 defer db.lock.RUnlock() 180 181 db.reference(child, parent) 182 } 183 184 // reference is the private locked version of Reference. 185 func (db *Database) reference(child common.Hash, parent common.Hash) { 186 // If the node does not exist, it's a node pulled from disk, skip 187 node, ok := db.nodes[child] 188 if !ok { 189 return 190 } 191 // If the reference already exists, only duplicate for roots 192 if _, ok = db.nodes[parent].children[child]; ok && parent != (common.Hash{}) { 193 return 194 } 195 node.parents++ 196 db.nodes[parent].children[child]++ 197 } 198 199 // Dereference removes an existing reference from a parent node to a child node. 200 func (db *Database) Dereference(child common.Hash, parent common.Hash) { 201 db.lock.Lock() 202 defer db.lock.Unlock() 203 204 nodes, storage, start := len(db.nodes), db.nodesSize, time.Now() 205 db.dereference(child, parent) 206 207 db.gcnodes += uint64(nodes - len(db.nodes)) 208 db.gcsize += storage - db.nodesSize 209 db.gctime += time.Since(start) 210 211 log.Trace("Dereferenced trie from memory database", "nodes", nodes-len(db.nodes), "size", storage-db.nodesSize, "time", time.Since(start), 212 "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.nodes), "livesize", db.nodesSize) 213 } 214 215 // dereference is the private locked version of Dereference. 216 func (db *Database) dereference(child common.Hash, parent common.Hash) { 217 // Dereference the parent-child 218 node := db.nodes[parent] 219 220 node.children[child]-- 221 if node.children[child] == 0 { 222 delete(node.children, child) 223 } 224 // If the node does not exist, it's a previously committed node. 225 node, ok := db.nodes[child] 226 if !ok { 227 return 228 } 229 // If there are no more references to the child, delete it and cascade 230 node.parents-- 231 if node.parents == 0 { 232 for hash := range node.children { 233 db.dereference(hash, child) 234 } 235 delete(db.nodes, child) 236 db.nodesSize -= common.StorageSize(common.HashLength + len(node.blob)) 237 } 238 } 239 240 // Commit iterates over all the children of a particular node, writes them out 241 // to disk, forcefully tearing down all references in both directions. 242 // 243 // As a side effect, all pre-images accumulated up to this point are also written. 244 func (db *Database) Commit(node common.Hash, report bool) error { 245 // Create a database batch to flush persistent data out. It is important that 246 // outside code doesn't see an inconsistent state (referenced data removed from 247 // memory cache during commit but not yet in persistent storage). This is ensured 248 // by only uncaching existing data when the database write finalizes. 249 db.lock.RLock() 250 251 start := time.Now() 252 batch := db.diskdb.NewBatch() 253 254 // Move all of the accumulated preimages into a write batch 255 for hash, preimage := range db.preimages { 256 if err := batch.Put(db.secureKey(hash[:]), preimage); err != nil { 257 log.Error("Failed to commit preimage from trie database", "err", err) 258 db.lock.RUnlock() 259 return err 260 } 261 if batch.ValueSize() > aquadb.IdealBatchSize { 262 if err := batch.Write(); err != nil { 263 return err 264 } 265 batch.Reset() 266 } 267 } 268 // Move the trie itself into the batch, flushing if enough data is accumulated 269 nodes, storage := len(db.nodes), db.nodesSize+db.preimagesSize 270 if err := db.commit(node, batch); err != nil { 271 log.Error("Failed to commit trie from trie database", "err", err) 272 db.lock.RUnlock() 273 return err 274 } 275 // Write batch ready, unlock for readers during persistence 276 if err := batch.Write(); err != nil { 277 log.Error("Failed to write trie to disk", "err", err) 278 db.lock.RUnlock() 279 return err 280 } 281 db.lock.RUnlock() 282 283 // Write successful, clear out the flushed data 284 db.lock.Lock() 285 defer db.lock.Unlock() 286 287 db.preimages = make(map[common.Hash][]byte) 288 db.preimagesSize = 0 289 290 db.uncache(node) 291 292 logger := log.Info 293 if !report { 294 logger = log.Debug 295 } 296 logger("Persisted trie from memory database", "nodes", nodes-len(db.nodes), "size", storage-db.nodesSize, "time", time.Since(start), 297 "gcnodes", db.gcnodes, "gcsize", db.gcsize, "gctime", db.gctime, "livenodes", len(db.nodes), "livesize", db.nodesSize) 298 299 // Reset the garbage collection statistics 300 db.gcnodes, db.gcsize, db.gctime = 0, 0, 0 301 302 return nil 303 } 304 305 // commit is the private locked version of Commit. 306 func (db *Database) commit(hash common.Hash, batch aquadb.Batch) error { 307 // If the node does not exist, it's a previously committed node 308 node, ok := db.nodes[hash] 309 if !ok { 310 return nil 311 } 312 for child := range node.children { 313 if err := db.commit(child, batch); err != nil { 314 return err 315 } 316 } 317 if err := batch.Put(hash[:], node.blob); err != nil { 318 return err 319 } 320 // If we've reached an optimal match size, commit and start over 321 if batch.ValueSize() >= aquadb.IdealBatchSize { 322 if err := batch.Write(); err != nil { 323 return err 324 } 325 batch.Reset() 326 } 327 return nil 328 } 329 330 // uncache is the post-processing step of a commit operation where the already 331 // persisted trie is removed from the cache. The reason behind the two-phase 332 // commit is to ensure consistent data availability while moving from memory 333 // to disk. 334 func (db *Database) uncache(hash common.Hash) { 335 // If the node does not exist, we're done on this path 336 node, ok := db.nodes[hash] 337 if !ok { 338 return 339 } 340 // Otherwise uncache the node's subtries and remove the node itself too 341 for child := range node.children { 342 db.uncache(child) 343 } 344 delete(db.nodes, hash) 345 db.nodesSize -= common.StorageSize(common.HashLength + len(node.blob)) 346 } 347 348 // Size returns the current storage size of the memory cache in front of the 349 // persistent database layer. 350 func (db *Database) Size() common.StorageSize { 351 db.lock.RLock() 352 defer db.lock.RUnlock() 353 354 return db.nodesSize + db.preimagesSize 355 }