github.com/ethw3/go-ethereuma@v0.0.0-20221013053120-c14602a4c23c/trie/sync.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package trie 18 19 import ( 20 "errors" 21 "fmt" 22 23 "github.com/ethw3/go-ethereuma/common" 24 "github.com/ethw3/go-ethereuma/common/prque" 25 "github.com/ethw3/go-ethereuma/core/rawdb" 26 "github.com/ethw3/go-ethereuma/ethdb" 27 "github.com/ethw3/go-ethereuma/log" 28 ) 29 30 // ErrNotRequested is returned by the trie sync when it's requested to process a 31 // node it did not request. 32 var ErrNotRequested = errors.New("not requested") 33 34 // ErrAlreadyProcessed is returned by the trie sync when it's requested to process a 35 // node it already processed previously. 36 var ErrAlreadyProcessed = errors.New("already processed") 37 38 // maxFetchesPerDepth is the maximum number of pending trie nodes per depth. The 39 // role of this value is to limit the number of trie nodes that get expanded in 40 // memory if the node was configured with a significant number of peers. 41 const maxFetchesPerDepth = 16384 42 43 // SyncPath is a path tuple identifying a particular trie node either in a single 44 // trie (account) or a layered trie (account -> storage). 45 // 46 // Content wise the tuple either has 1 element if it addresses a node in a single 47 // trie or 2 elements if it addresses a node in a stacked trie. 48 // 49 // To support aiming arbitrary trie nodes, the path needs to support odd nibble 50 // lengths. To avoid transferring expanded hex form over the network, the last 51 // part of the tuple (which needs to index into the middle of a trie) is compact 52 // encoded. In case of a 2-tuple, the first item is always 32 bytes so that is 53 // simple binary encoded. 54 // 55 // Examples: 56 // - Path 0x9 -> {0x19} 57 // - Path 0x99 -> {0x0099} 58 // - Path 0x01234567890123456789012345678901012345678901234567890123456789019 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x19} 59 // - Path 0x012345678901234567890123456789010123456789012345678901234567890199 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x0099} 60 type SyncPath [][]byte 61 62 // NewSyncPath converts an expanded trie path from nibble form into a compact 63 // version that can be sent over the network. 64 func NewSyncPath(path []byte) SyncPath { 65 // If the hash is from the account trie, append a single item, if it 66 // is from the a storage trie, append a tuple. Note, the length 64 is 67 // clashing between account leaf and storage root. It's fine though 68 // because having a trie node at 64 depth means a hash collision was 69 // found and we're long dead. 70 if len(path) < 64 { 71 return SyncPath{hexToCompact(path)} 72 } 73 return SyncPath{hexToKeybytes(path[:64]), hexToCompact(path[64:])} 74 } 75 76 // nodeRequest represents a scheduled or already in-flight trie node retrieval request. 77 type nodeRequest struct { 78 hash common.Hash // Hash of the trie node to retrieve 79 path []byte // Merkle path leading to this node for prioritization 80 data []byte // Data content of the node, cached until all subtrees complete 81 82 parent *nodeRequest // Parent state node referencing this entry 83 deps int // Number of dependencies before allowed to commit this node 84 callback LeafCallback // Callback to invoke if a leaf node it reached on this branch 85 } 86 87 // codeRequest represents a scheduled or already in-flight bytecode retrieval request. 88 type codeRequest struct { 89 hash common.Hash // Hash of the contract bytecode to retrieve 90 path []byte // Merkle path leading to this node for prioritization 91 data []byte // Data content of the node, cached until all subtrees complete 92 parents []*nodeRequest // Parent state nodes referencing this entry (notify all upon completion) 93 } 94 95 // NodeSyncResult is a response with requested trie node along with its node path. 96 type NodeSyncResult struct { 97 Path string // Path of the originally unknown trie node 98 Data []byte // Data content of the retrieved trie node 99 } 100 101 // CodeSyncResult is a response with requested bytecode along with its hash. 102 type CodeSyncResult struct { 103 Hash common.Hash // Hash the originally unknown bytecode 104 Data []byte // Data content of the retrieved bytecode 105 } 106 107 // syncMemBatch is an in-memory buffer of successfully downloaded but not yet 108 // persisted data items. 109 type syncMemBatch struct { 110 nodes map[string][]byte // In-memory membatch of recently completed nodes 111 hashes map[string]common.Hash // Hashes of recently completed nodes 112 codes map[common.Hash][]byte // In-memory membatch of recently completed codes 113 } 114 115 // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes. 116 func newSyncMemBatch() *syncMemBatch { 117 return &syncMemBatch{ 118 nodes: make(map[string][]byte), 119 hashes: make(map[string]common.Hash), 120 codes: make(map[common.Hash][]byte), 121 } 122 } 123 124 // hasNode reports the trie node with specific path is already cached. 125 func (batch *syncMemBatch) hasNode(path []byte) bool { 126 _, ok := batch.nodes[string(path)] 127 return ok 128 } 129 130 // hasCode reports the contract code with specific hash is already cached. 131 func (batch *syncMemBatch) hasCode(hash common.Hash) bool { 132 _, ok := batch.codes[hash] 133 return ok 134 } 135 136 // Sync is the main state trie synchronisation scheduler, which provides yet 137 // unknown trie hashes to retrieve, accepts node data associated with said hashes 138 // and reconstructs the trie step by step until all is done. 139 type Sync struct { 140 database ethdb.KeyValueReader // Persistent database to check for existing entries 141 membatch *syncMemBatch // Memory buffer to avoid frequent database writes 142 nodeReqs map[string]*nodeRequest // Pending requests pertaining to a trie node path 143 codeReqs map[common.Hash]*codeRequest // Pending requests pertaining to a code hash 144 queue *prque.Prque // Priority queue with the pending requests 145 fetches map[int]int // Number of active fetches per trie node depth 146 } 147 148 // NewSync creates a new trie data download scheduler. 149 func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallback) *Sync { 150 ts := &Sync{ 151 database: database, 152 membatch: newSyncMemBatch(), 153 nodeReqs: make(map[string]*nodeRequest), 154 codeReqs: make(map[common.Hash]*codeRequest), 155 queue: prque.New(nil), 156 fetches: make(map[int]int), 157 } 158 ts.AddSubTrie(root, nil, common.Hash{}, nil, callback) 159 return ts 160 } 161 162 // AddSubTrie registers a new trie to the sync code, rooted at the designated 163 // parent for completion tracking. The given path is a unique node path in 164 // hex format and contain all the parent path if it's layered trie node. 165 func (s *Sync) AddSubTrie(root common.Hash, path []byte, parent common.Hash, parentPath []byte, callback LeafCallback) { 166 // Short circuit if the trie is empty or already known 167 if root == emptyRoot { 168 return 169 } 170 if s.membatch.hasNode(path) { 171 return 172 } 173 if rawdb.HasTrieNode(s.database, root) { 174 return 175 } 176 // Assemble the new sub-trie sync request 177 req := &nodeRequest{ 178 hash: root, 179 path: path, 180 callback: callback, 181 } 182 // If this sub-trie has a designated parent, link them together 183 if parent != (common.Hash{}) { 184 ancestor := s.nodeReqs[string(parentPath)] 185 if ancestor == nil { 186 panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent)) 187 } 188 ancestor.deps++ 189 req.parent = ancestor 190 } 191 s.scheduleNodeRequest(req) 192 } 193 194 // AddCodeEntry schedules the direct retrieval of a contract code that should not 195 // be interpreted as a trie node, but rather accepted and stored into the database 196 // as is. 197 func (s *Sync) AddCodeEntry(hash common.Hash, path []byte, parent common.Hash, parentPath []byte) { 198 // Short circuit if the entry is empty or already known 199 if hash == emptyState { 200 return 201 } 202 if s.membatch.hasCode(hash) { 203 return 204 } 205 // If database says duplicate, the blob is present for sure. 206 // Note we only check the existence with new code scheme, fast 207 // sync is expected to run with a fresh new node. Even there 208 // exists the code with legacy format, fetch and store with 209 // new scheme anyway. 210 if rawdb.HasCodeWithPrefix(s.database, hash) { 211 return 212 } 213 // Assemble the new sub-trie sync request 214 req := &codeRequest{ 215 path: path, 216 hash: hash, 217 } 218 // If this sub-trie has a designated parent, link them together 219 if parent != (common.Hash{}) { 220 ancestor := s.nodeReqs[string(parentPath)] // the parent of codereq can ONLY be nodereq 221 if ancestor == nil { 222 panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent)) 223 } 224 ancestor.deps++ 225 req.parents = append(req.parents, ancestor) 226 } 227 s.scheduleCodeRequest(req) 228 } 229 230 // Missing retrieves the known missing nodes from the trie for retrieval. To aid 231 // both eth/6x style fast sync and snap/1x style state sync, the paths of trie 232 // nodes are returned too, as well as separate hash list for codes. 233 func (s *Sync) Missing(max int) ([]string, []common.Hash, []common.Hash) { 234 var ( 235 nodePaths []string 236 nodeHashes []common.Hash 237 codeHashes []common.Hash 238 ) 239 for !s.queue.Empty() && (max == 0 || len(nodeHashes)+len(codeHashes) < max) { 240 // Retrieve the next item in line 241 item, prio := s.queue.Peek() 242 243 // If we have too many already-pending tasks for this depth, throttle 244 depth := int(prio >> 56) 245 if s.fetches[depth] > maxFetchesPerDepth { 246 break 247 } 248 // Item is allowed to be scheduled, add it to the task list 249 s.queue.Pop() 250 s.fetches[depth]++ 251 252 switch item := item.(type) { 253 case common.Hash: 254 codeHashes = append(codeHashes, item) 255 case string: 256 req, ok := s.nodeReqs[item] 257 if !ok { 258 log.Error("Missing node request", "path", item) 259 continue // System very wrong, shouldn't happen 260 } 261 nodePaths = append(nodePaths, item) 262 nodeHashes = append(nodeHashes, req.hash) 263 } 264 } 265 return nodePaths, nodeHashes, codeHashes 266 } 267 268 // ProcessCode injects the received data for requested item. Note it can 269 // happpen that the single response commits two pending requests(e.g. 270 // there are two requests one for code and one for node but the hash 271 // is same). In this case the second response for the same hash will 272 // be treated as "non-requested" item or "already-processed" item but 273 // there is no downside. 274 func (s *Sync) ProcessCode(result CodeSyncResult) error { 275 // If the code was not requested or it's already processed, bail out 276 req := s.codeReqs[result.Hash] 277 if req == nil { 278 return ErrNotRequested 279 } 280 if req.data != nil { 281 return ErrAlreadyProcessed 282 } 283 req.data = result.Data 284 return s.commitCodeRequest(req) 285 } 286 287 // ProcessNode injects the received data for requested item. Note it can 288 // happen that the single response commits two pending requests(e.g. 289 // there are two requests one for code and one for node but the hash 290 // is same). In this case the second response for the same hash will 291 // be treated as "non-requested" item or "already-processed" item but 292 // there is no downside. 293 func (s *Sync) ProcessNode(result NodeSyncResult) error { 294 // If the trie node was not requested or it's already processed, bail out 295 req := s.nodeReqs[result.Path] 296 if req == nil { 297 return ErrNotRequested 298 } 299 if req.data != nil { 300 return ErrAlreadyProcessed 301 } 302 // Decode the node data content and update the request 303 node, err := decodeNode(req.hash.Bytes(), result.Data) 304 if err != nil { 305 return err 306 } 307 req.data = result.Data 308 309 // Create and schedule a request for all the children nodes 310 requests, err := s.children(req, node) 311 if err != nil { 312 return err 313 } 314 if len(requests) == 0 && req.deps == 0 { 315 s.commitNodeRequest(req) 316 } else { 317 req.deps += len(requests) 318 for _, child := range requests { 319 s.scheduleNodeRequest(child) 320 } 321 } 322 return nil 323 } 324 325 // Commit flushes the data stored in the internal membatch out to persistent 326 // storage, returning any occurred error. 327 func (s *Sync) Commit(dbw ethdb.Batch) error { 328 // Dump the membatch into a database dbw 329 for path, value := range s.membatch.nodes { 330 rawdb.WriteTrieNode(dbw, s.membatch.hashes[path], value) 331 } 332 for hash, value := range s.membatch.codes { 333 rawdb.WriteCode(dbw, hash, value) 334 } 335 // Drop the membatch data and return 336 s.membatch = newSyncMemBatch() 337 return nil 338 } 339 340 // Pending returns the number of state entries currently pending for download. 341 func (s *Sync) Pending() int { 342 return len(s.nodeReqs) + len(s.codeReqs) 343 } 344 345 // schedule inserts a new state retrieval request into the fetch queue. If there 346 // is already a pending request for this node, the new request will be discarded 347 // and only a parent reference added to the old one. 348 func (s *Sync) scheduleNodeRequest(req *nodeRequest) { 349 s.nodeReqs[string(req.path)] = req 350 351 // Schedule the request for future retrieval. This queue is shared 352 // by both node requests and code requests. 353 prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents 354 for i := 0; i < 14 && i < len(req.path); i++ { 355 prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order 356 } 357 s.queue.Push(string(req.path), prio) 358 } 359 360 // schedule inserts a new state retrieval request into the fetch queue. If there 361 // is already a pending request for this node, the new request will be discarded 362 // and only a parent reference added to the old one. 363 func (s *Sync) scheduleCodeRequest(req *codeRequest) { 364 // If we're already requesting this node, add a new reference and stop 365 if old, ok := s.codeReqs[req.hash]; ok { 366 old.parents = append(old.parents, req.parents...) 367 return 368 } 369 s.codeReqs[req.hash] = req 370 371 // Schedule the request for future retrieval. This queue is shared 372 // by both node requests and code requests. 373 prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents 374 for i := 0; i < 14 && i < len(req.path); i++ { 375 prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order 376 } 377 s.queue.Push(req.hash, prio) 378 } 379 380 // children retrieves all the missing children of a state trie entry for future 381 // retrieval scheduling. 382 func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) { 383 // Gather all the children of the node, irrelevant whether known or not 384 type child struct { 385 path []byte 386 node node 387 } 388 var children []child 389 390 switch node := (object).(type) { 391 case *shortNode: 392 key := node.Key 393 if hasTerm(key) { 394 key = key[:len(key)-1] 395 } 396 children = []child{{ 397 node: node.Val, 398 path: append(append([]byte(nil), req.path...), key...), 399 }} 400 case *fullNode: 401 for i := 0; i < 17; i++ { 402 if node.Children[i] != nil { 403 children = append(children, child{ 404 node: node.Children[i], 405 path: append(append([]byte(nil), req.path...), byte(i)), 406 }) 407 } 408 } 409 default: 410 panic(fmt.Sprintf("unknown node: %+v", node)) 411 } 412 // Iterate over the children, and request all unknown ones 413 requests := make([]*nodeRequest, 0, len(children)) 414 for _, child := range children { 415 // Notify any external watcher of a new key/value node 416 if req.callback != nil { 417 if node, ok := (child.node).(valueNode); ok { 418 var paths [][]byte 419 if len(child.path) == 2*common.HashLength { 420 paths = append(paths, hexToKeybytes(child.path)) 421 } else if len(child.path) == 4*common.HashLength { 422 paths = append(paths, hexToKeybytes(child.path[:2*common.HashLength])) 423 paths = append(paths, hexToKeybytes(child.path[2*common.HashLength:])) 424 } 425 if err := req.callback(paths, child.path, node, req.hash, req.path); err != nil { 426 return nil, err 427 } 428 } 429 } 430 // If the child references another node, resolve or schedule 431 if node, ok := (child.node).(hashNode); ok { 432 // Try to resolve the node from the local database 433 if s.membatch.hasNode(child.path) { 434 continue 435 } 436 // If database says duplicate, then at least the trie node is present 437 // and we hold the assumption that it's NOT legacy contract code. 438 chash := common.BytesToHash(node) 439 if rawdb.HasTrieNode(s.database, chash) { 440 continue 441 } 442 // Locally unknown node, schedule for retrieval 443 requests = append(requests, &nodeRequest{ 444 path: child.path, 445 hash: chash, 446 parent: req, 447 callback: req.callback, 448 }) 449 } 450 } 451 return requests, nil 452 } 453 454 // commit finalizes a retrieval request and stores it into the membatch. If any 455 // of the referencing parent requests complete due to this commit, they are also 456 // committed themselves. 457 func (s *Sync) commitNodeRequest(req *nodeRequest) error { 458 // Write the node content to the membatch 459 s.membatch.nodes[string(req.path)] = req.data 460 s.membatch.hashes[string(req.path)] = req.hash 461 462 delete(s.nodeReqs, string(req.path)) 463 s.fetches[len(req.path)]-- 464 465 // Check parent for completion 466 if req.parent != nil { 467 req.parent.deps-- 468 if req.parent.deps == 0 { 469 if err := s.commitNodeRequest(req.parent); err != nil { 470 return err 471 } 472 } 473 } 474 return nil 475 } 476 477 // commit finalizes a retrieval request and stores it into the membatch. If any 478 // of the referencing parent requests complete due to this commit, they are also 479 // committed themselves. 480 func (s *Sync) commitCodeRequest(req *codeRequest) error { 481 // Write the node content to the membatch 482 s.membatch.codes[req.hash] = req.data 483 delete(s.codeReqs, req.hash) 484 s.fetches[len(req.path)]-- 485 486 // Check all parents for completion 487 for _, parent := range req.parents { 488 parent.deps-- 489 if parent.deps == 0 { 490 if err := s.commitNodeRequest(parent); err != nil { 491 return err 492 } 493 } 494 } 495 return nil 496 }