github.com/calmw/ethereum@v0.1.1/trie/trie.go (about) 1 // Copyright 2014 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 // Package trie implements Merkle Patricia Tries. 18 package trie 19 20 import ( 21 "bytes" 22 "errors" 23 "fmt" 24 25 "github.com/calmw/ethereum/common" 26 "github.com/calmw/ethereum/core/types" 27 "github.com/calmw/ethereum/log" 28 "github.com/calmw/ethereum/trie/trienode" 29 ) 30 31 // Trie is a Merkle Patricia Trie. Use New to create a trie that sits on 32 // top of a database. Whenever trie performs a commit operation, the generated 33 // nodes will be gathered and returned in a set. Once the trie is committed, 34 // it's not usable anymore. Callers have to re-create the trie with new root 35 // based on the updated trie database. 36 // 37 // Trie is not safe for concurrent use. 38 type Trie struct { 39 root node 40 owner common.Hash 41 42 // Keep track of the number leaves which have been inserted since the last 43 // hashing operation. This number will not directly map to the number of 44 // actually unhashed nodes. 45 unhashed int 46 47 // reader is the handler trie can retrieve nodes from. 48 reader *trieReader 49 50 // tracer is the tool to track the trie changes. 51 // It will be reset after each commit operation. 52 tracer *tracer 53 } 54 55 // newFlag returns the cache flag value for a newly created node. 56 func (t *Trie) newFlag() nodeFlag { 57 return nodeFlag{dirty: true} 58 } 59 60 // Copy returns a copy of Trie. 61 func (t *Trie) Copy() *Trie { 62 return &Trie{ 63 root: t.root, 64 owner: t.owner, 65 unhashed: t.unhashed, 66 reader: t.reader, 67 tracer: t.tracer.copy(), 68 } 69 } 70 71 // New creates the trie instance with provided trie id and the read-only 72 // database. The state specified by trie id must be available, otherwise 73 // an error will be returned. The trie root specified by trie id can be 74 // zero hash or the sha3 hash of an empty string, then trie is initially 75 // empty, otherwise, the root node must be present in database or returns 76 // a MissingNodeError if not. 77 func New(id *ID, db NodeReader) (*Trie, error) { 78 reader, err := newTrieReader(id.StateRoot, id.Owner, db) 79 if err != nil { 80 return nil, err 81 } 82 trie := &Trie{ 83 owner: id.Owner, 84 reader: reader, 85 tracer: newTracer(), 86 } 87 if id.Root != (common.Hash{}) && id.Root != types.EmptyRootHash { 88 rootnode, err := trie.resolveAndTrack(id.Root[:], nil) 89 if err != nil { 90 return nil, err 91 } 92 trie.root = rootnode 93 } 94 return trie, nil 95 } 96 97 // NewEmpty is a shortcut to create empty tree. It's mostly used in tests. 98 func NewEmpty(db *Database) *Trie { 99 tr, _ := New(TrieID(types.EmptyRootHash), db) 100 return tr 101 } 102 103 // NodeIterator returns an iterator that returns nodes of the trie. Iteration starts at 104 // the key after the given start key. 105 func (t *Trie) NodeIterator(start []byte) NodeIterator { 106 return newNodeIterator(t, start) 107 } 108 109 // MustGet is a wrapper of Get and will omit any encountered error but just 110 // print out an error message. 111 func (t *Trie) MustGet(key []byte) []byte { 112 res, err := t.Get(key) 113 if err != nil { 114 log.Error("Unhandled trie error in Trie.Get", "err", err) 115 } 116 return res 117 } 118 119 // Get returns the value for key stored in the trie. 120 // The value bytes must not be modified by the caller. 121 // 122 // If the requested node is not present in trie, no error will be returned. 123 // If the trie is corrupted, a MissingNodeError is returned. 124 func (t *Trie) Get(key []byte) ([]byte, error) { 125 value, newroot, didResolve, err := t.get(t.root, keybytesToHex(key), 0) 126 if err == nil && didResolve { 127 t.root = newroot 128 } 129 return value, err 130 } 131 132 func (t *Trie) get(origNode node, key []byte, pos int) (value []byte, newnode node, didResolve bool, err error) { 133 switch n := (origNode).(type) { 134 case nil: 135 return nil, nil, false, nil 136 case valueNode: 137 return n, n, false, nil 138 case *shortNode: 139 if len(key)-pos < len(n.Key) || !bytes.Equal(n.Key, key[pos:pos+len(n.Key)]) { 140 // key not found in trie 141 return nil, n, false, nil 142 } 143 value, newnode, didResolve, err = t.get(n.Val, key, pos+len(n.Key)) 144 if err == nil && didResolve { 145 n = n.copy() 146 n.Val = newnode 147 } 148 return value, n, didResolve, err 149 case *fullNode: 150 value, newnode, didResolve, err = t.get(n.Children[key[pos]], key, pos+1) 151 if err == nil && didResolve { 152 n = n.copy() 153 n.Children[key[pos]] = newnode 154 } 155 return value, n, didResolve, err 156 case hashNode: 157 child, err := t.resolveAndTrack(n, key[:pos]) 158 if err != nil { 159 return nil, n, true, err 160 } 161 value, newnode, _, err := t.get(child, key, pos) 162 return value, newnode, true, err 163 default: 164 panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode)) 165 } 166 } 167 168 // MustGetNode is a wrapper of GetNode and will omit any encountered error but 169 // just print out an error message. 170 func (t *Trie) MustGetNode(path []byte) ([]byte, int) { 171 item, resolved, err := t.GetNode(path) 172 if err != nil { 173 log.Error("Unhandled trie error in Trie.GetNode", "err", err) 174 } 175 return item, resolved 176 } 177 178 // GetNode retrieves a trie node by compact-encoded path. It is not possible 179 // to use keybyte-encoding as the path might contain odd nibbles. 180 // 181 // If the requested node is not present in trie, no error will be returned. 182 // If the trie is corrupted, a MissingNodeError is returned. 183 func (t *Trie) GetNode(path []byte) ([]byte, int, error) { 184 item, newroot, resolved, err := t.getNode(t.root, compactToHex(path), 0) 185 if err != nil { 186 return nil, resolved, err 187 } 188 if resolved > 0 { 189 t.root = newroot 190 } 191 if item == nil { 192 return nil, resolved, nil 193 } 194 return item, resolved, nil 195 } 196 197 func (t *Trie) getNode(origNode node, path []byte, pos int) (item []byte, newnode node, resolved int, err error) { 198 // If non-existent path requested, abort 199 if origNode == nil { 200 return nil, nil, 0, nil 201 } 202 // If we reached the requested path, return the current node 203 if pos >= len(path) { 204 // Although we most probably have the original node expanded, encoding 205 // that into consensus form can be nasty (needs to cascade down) and 206 // time consuming. Instead, just pull the hash up from disk directly. 207 var hash hashNode 208 if node, ok := origNode.(hashNode); ok { 209 hash = node 210 } else { 211 hash, _ = origNode.cache() 212 } 213 if hash == nil { 214 return nil, origNode, 0, errors.New("non-consensus node") 215 } 216 blob, err := t.reader.node(path, common.BytesToHash(hash)) 217 return blob, origNode, 1, err 218 } 219 // Path still needs to be traversed, descend into children 220 switch n := (origNode).(type) { 221 case valueNode: 222 // Path prematurely ended, abort 223 return nil, nil, 0, nil 224 225 case *shortNode: 226 if len(path)-pos < len(n.Key) || !bytes.Equal(n.Key, path[pos:pos+len(n.Key)]) { 227 // Path branches off from short node 228 return nil, n, 0, nil 229 } 230 item, newnode, resolved, err = t.getNode(n.Val, path, pos+len(n.Key)) 231 if err == nil && resolved > 0 { 232 n = n.copy() 233 n.Val = newnode 234 } 235 return item, n, resolved, err 236 237 case *fullNode: 238 item, newnode, resolved, err = t.getNode(n.Children[path[pos]], path, pos+1) 239 if err == nil && resolved > 0 { 240 n = n.copy() 241 n.Children[path[pos]] = newnode 242 } 243 return item, n, resolved, err 244 245 case hashNode: 246 child, err := t.resolveAndTrack(n, path[:pos]) 247 if err != nil { 248 return nil, n, 1, err 249 } 250 item, newnode, resolved, err := t.getNode(child, path, pos) 251 return item, newnode, resolved + 1, err 252 253 default: 254 panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode)) 255 } 256 } 257 258 // MustUpdate is a wrapper of Update and will omit any encountered error but 259 // just print out an error message. 260 func (t *Trie) MustUpdate(key, value []byte) { 261 if err := t.Update(key, value); err != nil { 262 log.Error("Unhandled trie error in Trie.Update", "err", err) 263 } 264 } 265 266 // Update associates key with value in the trie. Subsequent calls to 267 // Get will return value. If value has length zero, any existing value 268 // is deleted from the trie and calls to Get will return nil. 269 // 270 // The value bytes must not be modified by the caller while they are 271 // stored in the trie. 272 // 273 // If the requested node is not present in trie, no error will be returned. 274 // If the trie is corrupted, a MissingNodeError is returned. 275 func (t *Trie) Update(key, value []byte) error { 276 return t.update(key, value) 277 } 278 279 func (t *Trie) update(key, value []byte) error { 280 t.unhashed++ 281 k := keybytesToHex(key) 282 if len(value) != 0 { 283 _, n, err := t.insert(t.root, nil, k, valueNode(value)) 284 if err != nil { 285 return err 286 } 287 t.root = n 288 } else { 289 _, n, err := t.delete(t.root, nil, k) 290 if err != nil { 291 return err 292 } 293 t.root = n 294 } 295 return nil 296 } 297 298 func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error) { 299 if len(key) == 0 { 300 if v, ok := n.(valueNode); ok { 301 return !bytes.Equal(v, value.(valueNode)), value, nil 302 } 303 return true, value, nil 304 } 305 switch n := n.(type) { 306 case *shortNode: 307 matchlen := prefixLen(key, n.Key) 308 // If the whole key matches, keep this short node as is 309 // and only update the value. 310 if matchlen == len(n.Key) { 311 dirty, nn, err := t.insert(n.Val, append(prefix, key[:matchlen]...), key[matchlen:], value) 312 if !dirty || err != nil { 313 return false, n, err 314 } 315 return true, &shortNode{n.Key, nn, t.newFlag()}, nil 316 } 317 // Otherwise branch out at the index where they differ. 318 branch := &fullNode{flags: t.newFlag()} 319 var err error 320 _, branch.Children[n.Key[matchlen]], err = t.insert(nil, append(prefix, n.Key[:matchlen+1]...), n.Key[matchlen+1:], n.Val) 321 if err != nil { 322 return false, nil, err 323 } 324 _, branch.Children[key[matchlen]], err = t.insert(nil, append(prefix, key[:matchlen+1]...), key[matchlen+1:], value) 325 if err != nil { 326 return false, nil, err 327 } 328 // Replace this shortNode with the branch if it occurs at index 0. 329 if matchlen == 0 { 330 return true, branch, nil 331 } 332 // New branch node is created as a child of the original short node. 333 // Track the newly inserted node in the tracer. The node identifier 334 // passed is the path from the root node. 335 t.tracer.onInsert(append(prefix, key[:matchlen]...)) 336 337 // Replace it with a short node leading up to the branch. 338 return true, &shortNode{key[:matchlen], branch, t.newFlag()}, nil 339 340 case *fullNode: 341 dirty, nn, err := t.insert(n.Children[key[0]], append(prefix, key[0]), key[1:], value) 342 if !dirty || err != nil { 343 return false, n, err 344 } 345 n = n.copy() 346 n.flags = t.newFlag() 347 n.Children[key[0]] = nn 348 return true, n, nil 349 350 case nil: 351 // New short node is created and track it in the tracer. The node identifier 352 // passed is the path from the root node. Note the valueNode won't be tracked 353 // since it's always embedded in its parent. 354 t.tracer.onInsert(prefix) 355 356 return true, &shortNode{key, value, t.newFlag()}, nil 357 358 case hashNode: 359 // We've hit a part of the trie that isn't loaded yet. Load 360 // the node and insert into it. This leaves all child nodes on 361 // the path to the value in the trie. 362 rn, err := t.resolveAndTrack(n, prefix) 363 if err != nil { 364 return false, nil, err 365 } 366 dirty, nn, err := t.insert(rn, prefix, key, value) 367 if !dirty || err != nil { 368 return false, rn, err 369 } 370 return true, nn, nil 371 372 default: 373 panic(fmt.Sprintf("%T: invalid node: %v", n, n)) 374 } 375 } 376 377 // MustDelete is a wrapper of Delete and will omit any encountered error but 378 // just print out an error message. 379 func (t *Trie) MustDelete(key []byte) { 380 if err := t.Delete(key); err != nil { 381 log.Error("Unhandled trie error in Trie.Delete", "err", err) 382 } 383 } 384 385 // Delete removes any existing value for key from the trie. 386 // 387 // If the requested node is not present in trie, no error will be returned. 388 // If the trie is corrupted, a MissingNodeError is returned. 389 func (t *Trie) Delete(key []byte) error { 390 t.unhashed++ 391 k := keybytesToHex(key) 392 _, n, err := t.delete(t.root, nil, k) 393 if err != nil { 394 return err 395 } 396 t.root = n 397 return nil 398 } 399 400 // delete returns the new root of the trie with key deleted. 401 // It reduces the trie to minimal form by simplifying 402 // nodes on the way up after deleting recursively. 403 func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { 404 switch n := n.(type) { 405 case *shortNode: 406 matchlen := prefixLen(key, n.Key) 407 if matchlen < len(n.Key) { 408 return false, n, nil // don't replace n on mismatch 409 } 410 if matchlen == len(key) { 411 // The matched short node is deleted entirely and track 412 // it in the deletion set. The same the valueNode doesn't 413 // need to be tracked at all since it's always embedded. 414 t.tracer.onDelete(prefix) 415 416 return true, nil, nil // remove n entirely for whole matches 417 } 418 // The key is longer than n.Key. Remove the remaining suffix 419 // from the subtrie. Child can never be nil here since the 420 // subtrie must contain at least two other values with keys 421 // longer than n.Key. 422 dirty, child, err := t.delete(n.Val, append(prefix, key[:len(n.Key)]...), key[len(n.Key):]) 423 if !dirty || err != nil { 424 return false, n, err 425 } 426 switch child := child.(type) { 427 case *shortNode: 428 // The child shortNode is merged into its parent, track 429 // is deleted as well. 430 t.tracer.onDelete(append(prefix, n.Key...)) 431 432 // Deleting from the subtrie reduced it to another 433 // short node. Merge the nodes to avoid creating a 434 // shortNode{..., shortNode{...}}. Use concat (which 435 // always creates a new slice) instead of append to 436 // avoid modifying n.Key since it might be shared with 437 // other nodes. 438 return true, &shortNode{concat(n.Key, child.Key...), child.Val, t.newFlag()}, nil 439 default: 440 return true, &shortNode{n.Key, child, t.newFlag()}, nil 441 } 442 443 case *fullNode: 444 dirty, nn, err := t.delete(n.Children[key[0]], append(prefix, key[0]), key[1:]) 445 if !dirty || err != nil { 446 return false, n, err 447 } 448 n = n.copy() 449 n.flags = t.newFlag() 450 n.Children[key[0]] = nn 451 452 // Because n is a full node, it must've contained at least two children 453 // before the delete operation. If the new child value is non-nil, n still 454 // has at least two children after the deletion, and cannot be reduced to 455 // a short node. 456 if nn != nil { 457 return true, n, nil 458 } 459 // Reduction: 460 // Check how many non-nil entries are left after deleting and 461 // reduce the full node to a short node if only one entry is 462 // left. Since n must've contained at least two children 463 // before deletion (otherwise it would not be a full node) n 464 // can never be reduced to nil. 465 // 466 // When the loop is done, pos contains the index of the single 467 // value that is left in n or -2 if n contains at least two 468 // values. 469 pos := -1 470 for i, cld := range &n.Children { 471 if cld != nil { 472 if pos == -1 { 473 pos = i 474 } else { 475 pos = -2 476 break 477 } 478 } 479 } 480 if pos >= 0 { 481 if pos != 16 { 482 // If the remaining entry is a short node, it replaces 483 // n and its key gets the missing nibble tacked to the 484 // front. This avoids creating an invalid 485 // shortNode{..., shortNode{...}}. Since the entry 486 // might not be loaded yet, resolve it just for this 487 // check. 488 cnode, err := t.resolve(n.Children[pos], append(prefix, byte(pos))) 489 if err != nil { 490 return false, nil, err 491 } 492 if cnode, ok := cnode.(*shortNode); ok { 493 // Replace the entire full node with the short node. 494 // Mark the original short node as deleted since the 495 // value is embedded into the parent now. 496 t.tracer.onDelete(append(prefix, byte(pos))) 497 498 k := append([]byte{byte(pos)}, cnode.Key...) 499 return true, &shortNode{k, cnode.Val, t.newFlag()}, nil 500 } 501 } 502 // Otherwise, n is replaced by a one-nibble short node 503 // containing the child. 504 return true, &shortNode{[]byte{byte(pos)}, n.Children[pos], t.newFlag()}, nil 505 } 506 // n still contains at least two values and cannot be reduced. 507 return true, n, nil 508 509 case valueNode: 510 return true, nil, nil 511 512 case nil: 513 return false, nil, nil 514 515 case hashNode: 516 // We've hit a part of the trie that isn't loaded yet. Load 517 // the node and delete from it. This leaves all child nodes on 518 // the path to the value in the trie. 519 rn, err := t.resolveAndTrack(n, prefix) 520 if err != nil { 521 return false, nil, err 522 } 523 dirty, nn, err := t.delete(rn, prefix, key) 524 if !dirty || err != nil { 525 return false, rn, err 526 } 527 return true, nn, nil 528 529 default: 530 panic(fmt.Sprintf("%T: invalid node: %v (%v)", n, n, key)) 531 } 532 } 533 534 func concat(s1 []byte, s2 ...byte) []byte { 535 r := make([]byte, len(s1)+len(s2)) 536 copy(r, s1) 537 copy(r[len(s1):], s2) 538 return r 539 } 540 541 func (t *Trie) resolve(n node, prefix []byte) (node, error) { 542 if n, ok := n.(hashNode); ok { 543 return t.resolveAndTrack(n, prefix) 544 } 545 return n, nil 546 } 547 548 // resolveAndTrack loads node from the underlying store with the given node hash 549 // and path prefix and also tracks the loaded node blob in tracer treated as the 550 // node's original value. The rlp-encoded blob is preferred to be loaded from 551 // database because it's easy to decode node while complex to encode node to blob. 552 func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) { 553 blob, err := t.reader.node(prefix, common.BytesToHash(n)) 554 if err != nil { 555 return nil, err 556 } 557 t.tracer.onRead(prefix, blob) 558 return mustDecodeNode(n, blob), nil 559 } 560 561 // Hash returns the root hash of the trie. It does not write to the 562 // database and can be used even if the trie doesn't have one. 563 func (t *Trie) Hash() common.Hash { 564 hash, cached := t.hashRoot() 565 t.root = cached 566 return common.BytesToHash(hash.(hashNode)) 567 } 568 569 // Commit collects all dirty nodes in the trie and replaces them with the 570 // corresponding node hash. All collected nodes (including dirty leaves if 571 // collectLeaf is true) will be encapsulated into a nodeset for return. 572 // The returned nodeset can be nil if the trie is clean (nothing to commit). 573 // Once the trie is committed, it's not usable anymore. A new trie must 574 // be created with new root and updated trie database for following usage 575 func (t *Trie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) { 576 defer t.tracer.reset() 577 578 nodes := trienode.NewNodeSet(t.owner) 579 t.tracer.markDeletions(nodes) 580 581 // Trie is empty and can be classified into two types of situations: 582 // - The trie was empty and no update happens 583 // - The trie was non-empty and all nodes are dropped 584 if t.root == nil { 585 return types.EmptyRootHash, nodes 586 } 587 // Derive the hash for all dirty nodes first. We hold the assumption 588 // in the following procedure that all nodes are hashed. 589 rootHash := t.Hash() 590 591 // Do a quick check if we really need to commit. This can happen e.g. 592 // if we load a trie for reading storage values, but don't write to it. 593 if hashedNode, dirty := t.root.cache(); !dirty { 594 // Replace the root node with the origin hash in order to 595 // ensure all resolved nodes are dropped after the commit. 596 t.root = hashedNode 597 return rootHash, nil 598 } 599 t.root = newCommitter(nodes, t.tracer, collectLeaf).Commit(t.root) 600 return rootHash, nodes 601 } 602 603 // hashRoot calculates the root hash of the given trie 604 func (t *Trie) hashRoot() (node, node) { 605 if t.root == nil { 606 return hashNode(types.EmptyRootHash.Bytes()), nil 607 } 608 // If the number of changes is below 100, we let one thread handle it 609 h := newHasher(t.unhashed >= 100) 610 defer func() { 611 returnHasherToPool(h) 612 t.unhashed = 0 613 }() 614 hashed, cached := h.hash(t.root, true) 615 return hashed, cached 616 } 617 618 // Reset drops the referenced root node and cleans all internal state. 619 func (t *Trie) Reset() { 620 t.root = nil 621 t.owner = common.Hash{} 622 t.unhashed = 0 623 t.tracer.reset() 624 }