github.com/klaytn/klaytn@v1.12.1/storage/statedb/sync.go (about) 1 // Modifications Copyright 2018 The klaytn Authors 2 // Copyright 2015 The go-ethereum Authors 3 // This file is part of the go-ethereum library. 4 // 5 // The go-ethereum library is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Lesser General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // The go-ethereum library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 17 // 18 // This file is derived from trie/sync.go (2018/06/04). 19 // Modified and improved for the klaytn development. 20 21 package statedb 22 23 import ( 24 "errors" 25 "fmt" 26 "strconv" 27 28 lru "github.com/hashicorp/golang-lru" 29 "github.com/klaytn/klaytn/common" 30 "github.com/klaytn/klaytn/common/prque" 31 "github.com/klaytn/klaytn/storage/database" 32 ) 33 34 // ErrNotRequested is returned by the trie sync when it's requested to process a 35 // node it did not request. 36 var ErrNotRequested = errors.New("not requested") 37 38 // ErrAlreadyProcessed is returned by the trie sync when it's requested to process a 39 // node it already processed previously. 40 var ErrAlreadyProcessed = errors.New("already processed") 41 42 // maxFetchesPerDepth is the maximum number of pending trie nodes per depth. The 43 // role of this value is to limit the number of trie nodes that get expanded in 44 // memory if the node was configured with a significant number of peers. 45 const maxFetchesPerDepth = 16384 46 47 // request represents a scheduled or already in-flight state retrieval request. 48 type request struct { 49 path []byte // Merkle path leading to this node for prioritization 50 hash common.Hash // Hash of the node data content to retrieve 51 data []byte // Data content of the node, cached until all subtrees complete 52 code bool // Whether this is a code entry 53 54 parents []*request // Parent state nodes referencing this entry (notify all upon completion) 55 depth int // Depth level within the trie the node is located to prioritise DFS 56 deps int // Number of dependencies before allowed to commit this node 57 58 callback LeafCallback // Callback to invoke if a leaf node it reached on this branch 59 } 60 61 // SyncPath is a path tuple identifying a particular trie node either in a single 62 // trie (account) or a layered trie (account -> storage). 63 // 64 // Content wise the tuple either has 1 element if it addresses a node in a single 65 // trie or 2 elements if it addresses a node in a stacked trie. 66 // 67 // To support aiming arbitrary trie nodes, the path needs to support odd nibble 68 // lengths. To avoid transferring expanded hex form over the network, the last 69 // part of the tuple (which needs to index into the middle of a trie) is compact 70 // encoded. In case of a 2-tuple, the first item is always 32 bytes so that is 71 // simple binary encoded. 72 // 73 // Examples: 74 // - Path 0x9 -> {0x19} 75 // - Path 0x99 -> {0x0099} 76 // - Path 0x01234567890123456789012345678901012345678901234567890123456789019 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x19} 77 // - Path 0x012345678901234567890123456789010123456789012345678901234567890199 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x0099} 78 type SyncPath [][]byte 79 80 // newSyncPath converts an expanded trie path from nibble form into a compact 81 // version that can be sent over the network. 82 func newSyncPath(path []byte) SyncPath { 83 // If the hash is from the account trie, append a single item, if it 84 // is from the a storage trie, append a tuple. Note, the length 64 is 85 // clashing between account leaf and storage root. It's fine though 86 // because having a trie node at 64 depth means a hash collision was 87 // found and we're long dead. 88 if len(path) < 64 { 89 return SyncPath{hexToCompact(path)} 90 } 91 return SyncPath{hexToKeybytes(path[:64]), hexToCompact(path[64:])} 92 } 93 94 // SyncResult is a response with requested data along with it's hash. 95 type SyncResult struct { 96 Hash common.Hash // Hash of the originally unknown trie node 97 Data []byte // Data content of the retrieved node 98 Err error 99 } 100 101 // syncMemBatch is an in-memory buffer of successfully downloaded but not yet 102 // persisted data items. 103 type syncMemBatch struct { 104 nodes map[common.Hash][]byte // In-memory membatch of recently completed nodes 105 codes map[common.Hash][]byte // In-memory membatch of recently completed codes 106 } 107 108 // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes. 109 func newSyncMemBatch() *syncMemBatch { 110 return &syncMemBatch{ 111 nodes: make(map[common.Hash][]byte), 112 codes: make(map[common.Hash][]byte), 113 } 114 } 115 116 // hasNode reports the trie node with specific hash is already cached. 117 func (batch *syncMemBatch) hasNode(hash common.Hash) bool { 118 _, ok := batch.nodes[hash] 119 return ok 120 } 121 122 // hasCode reports the contract code with specific hash is already cached. 123 func (batch *syncMemBatch) hasCode(hash common.Hash) bool { 124 _, ok := batch.codes[hash] 125 return ok 126 } 127 128 type StateTrieReadDB interface { 129 ReadTrieNode(hash common.ExtHash) ([]byte, error) 130 HasTrieNode(hash common.ExtHash) (bool, error) 131 HasCodeWithPrefix(hash common.Hash) bool 132 } 133 134 // TrieSync is the main state trie synchronisation scheduler, which provides yet 135 // unknown trie hashes to retrieve, accepts node data associated with said hashes 136 // and reconstructs the trie step by step until all is done. 137 type TrieSync struct { 138 database StateTrieReadDB // Persistent database to check for existing entries 139 membatch *syncMemBatch // Memory buffer to avoid frequent database writes 140 nodeReqs map[common.Hash]*request // Pending requests pertaining to a trie node hash 141 codeReqs map[common.Hash]*request // Pending requests pertaining to a code hash 142 queue *prque.Prque // Priority queue with the pending requests 143 fetches map[int]int // Number of active fetches per trie node depth 144 retrievedByDepth map[int]int // Retrieved trie node number counted by depth 145 committedByDepth map[int]int // Committed trie nodes number counted by depth 146 bloom *SyncBloom // Bloom filter for fast state existence checks 147 exist *lru.Cache // exist to check if the trie node is already written or not 148 } 149 150 // NewTrieSync creates a new trie data download scheduler. 151 // If both bloom and cache are set, only cache is used. 152 func NewTrieSync(root common.Hash, database StateTrieReadDB, callback LeafCallback, bloom *SyncBloom, lruCache *lru.Cache) *TrieSync { 153 ts := &TrieSync{ 154 database: database, 155 membatch: newSyncMemBatch(), 156 nodeReqs: make(map[common.Hash]*request), 157 codeReqs: make(map[common.Hash]*request), 158 queue: prque.New(), 159 fetches: make(map[int]int), 160 retrievedByDepth: make(map[int]int), 161 committedByDepth: make(map[int]int), 162 bloom: bloom, 163 exist: lruCache, 164 } 165 ts.AddSubTrie(root, nil, 0, common.Hash{}, callback) 166 return ts 167 } 168 169 // AddSubTrie registers a new trie to the sync code, rooted at the designated parent. 170 func (s *TrieSync) AddSubTrie(root common.Hash, path []byte, depth int, parent common.Hash, callback LeafCallback) { 171 // Short circuit if the trie is empty or already known 172 if root == emptyRoot { 173 return 174 } 175 if s.membatch.hasNode(root) { 176 return 177 } 178 if s.exist != nil { 179 if _, ok := s.exist.Get(root); ok { 180 // already written in migration, skip the node 181 return 182 } 183 } else if s.bloom == nil || s.bloom.Contains(root[:]) { 184 // Bloom filter says this might be a duplicate, double check. 185 // If database says yes, then at least the trie node is present 186 // and we hold the assumption that it's NOT legacy contract code. 187 if ok, _ := s.database.HasTrieNode(root.ExtendZero()); ok { 188 logger.Debug("skip write sub-trie", "root", root.String()) 189 return 190 } 191 // False positive, bump fault meter 192 bloomFaultMeter.Mark(1) 193 } 194 // Assemble the new sub-trie sync request 195 req := &request{ 196 path: path, 197 hash: root, 198 depth: depth, 199 callback: callback, 200 } 201 // If this sub-trie has a designated parent, link them together 202 if parent != (common.Hash{}) { 203 ancestor := s.nodeReqs[parent] 204 if ancestor == nil { 205 panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent)) 206 } 207 ancestor.deps++ 208 req.parents = append(req.parents, ancestor) 209 } 210 s.schedule(req) 211 } 212 213 // AddCodeEntry schedules the direct retrieval of a contract code that should not 214 // be interpreted as a trie node, but rather accepted and stored into the database 215 // as is. 216 func (s *TrieSync) AddCodeEntry(hash common.Hash, path []byte, depth int, parent common.Hash) { 217 // Short circuit if the entry is empty or already known 218 if hash == emptyState { 219 return 220 } 221 if s.membatch.hasCode(hash) { 222 return 223 } 224 if s.exist != nil { 225 if _, ok := s.exist.Get(hash); ok { 226 // already written in migration, skip the node 227 return 228 } 229 } else if s.bloom == nil || s.bloom.Contains(hash[:]) { 230 // Bloom filter says this might be a duplicate, double check. 231 // If database says yes, the blob is present for sure. 232 // Note we only check the existence with new code scheme, fast 233 // sync is expected to run with a fresh new node. Even there 234 // exists the code with legacy format, fetch and store with 235 // new scheme anyway. 236 if ok := s.database.HasCodeWithPrefix(hash); ok { 237 logger.Debug("skip write code entry", "root", hash.String()) 238 return 239 } 240 // False positive, bump fault meter 241 bloomFaultMeter.Mark(1) 242 } 243 // Assemble the new sub-trie sync request 244 req := &request{ 245 path: path, 246 hash: hash, 247 code: true, 248 depth: depth, 249 } 250 // If this sub-trie has a designated parent, link them together 251 if parent != (common.Hash{}) { 252 ancestor := s.nodeReqs[parent] // the parent of codereq can ONLY be nodereq 253 if ancestor == nil { 254 panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent)) 255 } 256 ancestor.deps++ 257 req.parents = append(req.parents, ancestor) 258 } 259 s.schedule(req) 260 } 261 262 // Missing retrieves the known missing nodes from the trie for retrieval. To aid 263 // both klay/6x style fast sync and snap/1x style state sync, the paths of trie 264 // nodes are returned too, as well as separate hash list for codes. 265 func (s *TrieSync) Missing(max int) (nodes []common.Hash, paths []SyncPath, codes []common.Hash) { 266 var ( 267 nodeHashes []common.Hash 268 nodePaths []SyncPath 269 codeHashes []common.Hash 270 ) 271 for !s.queue.Empty() && (max == 0 || len(nodeHashes)+len(codeHashes) < max) { 272 // Retrieve th enext item in line 273 item, prio := s.queue.Peek() 274 275 // If we have too many already-pending tasks for this depth, throttle 276 depth := int(prio >> 56) 277 if s.fetches[depth] > maxFetchesPerDepth { 278 break 279 } 280 // Item is allowed to be scheduled, add it to the task list 281 s.queue.Pop() 282 s.fetches[depth]++ 283 284 hash := item.(common.Hash) 285 if req, ok := s.nodeReqs[hash]; ok { 286 nodeHashes = append(nodeHashes, hash) 287 nodePaths = append(nodePaths, newSyncPath(req.path)) 288 } else { 289 codeHashes = append(codeHashes, hash) 290 } 291 } 292 return nodeHashes, nodePaths, codeHashes 293 } 294 295 // Process injects the received data for requested item. Note it can 296 // happen that the single response commits two pending requests(e.g. 297 // there are two requests one for code and one for node but the hash 298 // is same). In this case the second response for the same hash will 299 // be treated as "non-requested" item or "already-processed" item but 300 // there is no downside. 301 func (s *TrieSync) Process(result SyncResult) error { 302 // If the item was not requested either for code or node, bail out 303 if s.nodeReqs[result.Hash] == nil && s.codeReqs[result.Hash] == nil { 304 return ErrNotRequested 305 } 306 // There is an pending code request for this data, commit directly 307 var filled bool 308 if req := s.codeReqs[result.Hash]; req != nil && req.data == nil { 309 filled = true 310 req.data = result.Data 311 s.commit(req) 312 } 313 // There is an pending node request for this data, fill it. 314 if req := s.nodeReqs[result.Hash]; req != nil && req.data == nil { 315 filled = true 316 // Decode the node data content and update the request 317 node, err := decodeNode(result.Hash[:], result.Data) 318 if err != nil { 319 return err 320 } 321 req.data = result.Data 322 323 // Create and schedule a request for all the children nodes 324 requests, err := s.children(req, node) 325 if err != nil { 326 return err 327 } 328 if len(requests) == 0 && req.deps == 0 { 329 s.commit(req) 330 } else { 331 req.deps += len(requests) 332 for _, child := range requests { 333 s.schedule(child) 334 } 335 } 336 } 337 if !filled { 338 return ErrAlreadyProcessed 339 } 340 return nil 341 } 342 343 // Commit flushes the data stored in the internal membatch out to persistent 344 // storage, returning the number of items written and any occurred error. 345 func (s *TrieSync) Commit(dbw database.Batch) (int, error) { 346 written := 0 347 // Dump the membatch into a database dbw 348 for key, value := range s.membatch.nodes { 349 if err := dbw.Put(database.TrieNodeKey(key.ExtendZero()), value); err != nil { // only works with hash32 350 return written, err 351 } 352 if s.bloom != nil { 353 s.bloom.Add(key[:]) 354 } 355 if s.exist != nil { 356 s.exist.Add(key, nil) 357 } 358 written += 1 359 } 360 for key, value := range s.membatch.codes { 361 if err := dbw.Put(database.CodeKey(key), value); err != nil { 362 return written, err 363 } 364 if s.bloom != nil { 365 s.bloom.Add(key[:]) 366 } 367 if s.exist != nil { 368 s.exist.Add(key, nil) 369 } 370 written += 1 371 } 372 373 // Drop the membatch data and return 374 s.membatch = newSyncMemBatch() 375 return written, nil 376 } 377 378 // Pending returns the number of state entries currently pending for download. 379 func (s *TrieSync) Pending() int { 380 return len(s.nodeReqs) + len(s.codeReqs) 381 } 382 383 // schedule inserts a new state retrieval request into the fetch queue. If there 384 // is already a pending request for this node, the new request will be discarded 385 // and only a parent reference added to the old one. 386 func (s *TrieSync) schedule(req *request) { 387 reqset := s.nodeReqs 388 if req.code { 389 reqset = s.codeReqs 390 } 391 // If we're already requesting this node, add a new reference and stop 392 if old, ok := reqset[req.hash]; ok { 393 old.parents = append(old.parents, req.parents...) 394 return 395 } 396 397 // Count the retrieved trie by depth 398 s.retrievedByDepth[req.depth]++ 399 400 reqset[req.hash] = req 401 402 // Schedule the request for future retrieval. This queue is shared 403 // by both node requests and code requests. It can happen that there 404 // is a trie node and code has same hash. In this case two elements 405 // with same hash and same or different depth will be pushed. But it's 406 // ok the worst case is the second response will be treated as duplicated. 407 prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents 408 for i := 0; i < 14 && i < len(req.path); i++ { 409 prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order 410 } 411 s.queue.Push(req.hash, prio) 412 } 413 414 // children retrieves all the missing children of a state trie entry for future 415 // retrieval scheduling. 416 func (s *TrieSync) children(req *request, object node) ([]*request, error) { 417 // Gather all the children of the node, irrelevant whether known or not 418 type child struct { 419 path []byte 420 node node 421 depth int 422 } 423 children := []child{} 424 425 switch node := (object).(type) { 426 case *shortNode: 427 key := node.Key 428 if hasTerm(key) { 429 key = key[:len(key)-1] 430 } 431 children = []child{{ 432 node: node.Val, 433 path: append(append([]byte(nil), req.path...), key...), 434 depth: req.depth + len(node.Key), 435 }} 436 case *fullNode: 437 for i := 0; i < 17; i++ { 438 if node.Children[i] != nil { 439 children = append(children, child{ 440 node: node.Children[i], 441 path: append(append([]byte(nil), req.path...), byte(i)), 442 depth: req.depth + 1, 443 }) 444 } 445 } 446 default: 447 panic(fmt.Sprintf("unknown node: %+v", node)) 448 } 449 // Iterate over the children, and request all unknown ones 450 requests := make([]*request, 0, len(children)) 451 for _, child := range children { 452 // Notify any external watcher of a new key/value node 453 if req.callback != nil { 454 if node, ok := (child.node).(valueNode); ok { 455 var paths [][]byte 456 if len(child.path) == 2*common.HashLength { 457 paths = append(paths, hexToKeybytes(child.path)) 458 } else if len(child.path) == 4*common.HashLength { 459 paths = append(paths, hexToKeybytes(child.path[:2*common.HashLength])) 460 paths = append(paths, hexToKeybytes(child.path[2*common.HashLength:])) 461 } 462 if err := req.callback(paths, child.path, node, req.hash.ExtendZero(), child.depth); err != nil { 463 return nil, err 464 } 465 } 466 } 467 // If the child references another node, resolve or schedule 468 if node, ok := (child.node).(hashNode); ok { 469 // Try to resolve the node from the local database 470 hash := common.BytesToExtHash(node).Unextend() 471 if s.membatch.hasNode(hash) { 472 continue 473 } 474 if s.exist != nil { 475 if _, ok := s.exist.Get(hash); ok { 476 // already written in migration, skip the node 477 continue 478 } 479 } else if s.bloom == nil || s.bloom.Contains(node) { 480 // Bloom filter says this might be a duplicate, double check. 481 // If database says yes, then at least the trie node is present 482 // and we hold the assumption that it's NOT legacy contract code. 483 if ok, _ := s.database.HasTrieNode(hash.ExtendZero()); ok { 484 continue 485 } 486 // False positive, bump fault meter 487 bloomFaultMeter.Mark(1) 488 } 489 490 // Locally unknown node, schedule for retrieval 491 requests = append(requests, &request{ 492 path: child.path, 493 hash: hash, 494 parents: []*request{req}, 495 depth: child.depth, 496 callback: req.callback, 497 }) 498 } 499 } 500 return requests, nil 501 } 502 503 // commit finalizes a retrieval request and stores it into the membatch. If any 504 // of the referencing parent requests complete due to this commit, they are also 505 // committed themselves. 506 func (s *TrieSync) commit(req *request) (err error) { 507 // Count the committed trie by depth and Clear the counts of lower depth 508 s.committedByDepth[req.depth]++ 509 510 // Write the node content to the membatch 511 if req.code { 512 s.membatch.codes[req.hash] = req.data 513 delete(s.codeReqs, req.hash) 514 s.fetches[len(req.path)]-- 515 } else { 516 s.membatch.nodes[req.hash] = req.data 517 delete(s.nodeReqs, req.hash) 518 s.fetches[len(req.path)]-- 519 } 520 // Check all parents for completion 521 for _, parent := range req.parents { 522 parent.deps-- 523 if parent.deps == 0 { 524 if err := s.commit(parent); err != nil { 525 return err 526 } 527 } 528 } 529 return nil 530 } 531 532 // RetrievedByDepth returns the retrieved trie count by given depth. 533 // This number is same as the number of nodes that needs to be committed to complete trie sync. 534 func (s *TrieSync) RetrievedByDepth(depth int) int { 535 return s.retrievedByDepth[depth] 536 } 537 538 // CommittedByDepth returns the committed trie count by given depth. 539 func (s *TrieSync) CommittedByDepth(depth int) int { 540 return s.committedByDepth[depth] 541 } 542 543 // CalcProgressPercentage returns the progress percentage. 544 func (s *TrieSync) CalcProgressPercentage() float64 { 545 var progress float64 546 // depth max trie resolution (%) 547 // 0 1 100.00000 548 // 1 16 6.25000 549 // 2 256 0.39063 550 // 3 4,096 0.02441 551 // 4 65,536 0.00153 552 // 5 1,048,576 0.00010 553 554 for i := 0; i < 20; i++ { 555 c, r := s.CommittedByDepth(i), s.RetrievedByDepth(i) 556 557 var progressByDepth float64 558 559 if r == 0 { 560 break 561 } 562 563 if r > 0 { 564 progressByDepth = float64(c) / float64(r) * 100 565 if progressByDepth > progress && i < 4 { // Scan depth 0 ~ 3 for accuracy 566 progress = progressByDepth 567 } 568 } 569 570 logger.Debug("Trie sync progress by depth #"+strconv.Itoa(i), "committed", c, "retrieved", r, "progress", progressByDepth) 571 } 572 573 logger.Debug("Trie sync progress ", "progress", strconv.FormatFloat(progress, 'f', -1, 64)+"%") 574 575 return progress 576 }