github.com/carter-ya/go-ethereum@v0.0.0-20230628080049-d2309be3983b/trie/sync.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package trie 18 19 import ( 20 "errors" 21 "fmt" 22 "sync" 23 24 "github.com/ethereum/go-ethereum/common" 25 "github.com/ethereum/go-ethereum/common/prque" 26 "github.com/ethereum/go-ethereum/core/rawdb" 27 "github.com/ethereum/go-ethereum/ethdb" 28 "github.com/ethereum/go-ethereum/log" 29 ) 30 31 // ErrNotRequested is returned by the trie sync when it's requested to process a 32 // node it did not request. 33 var ErrNotRequested = errors.New("not requested") 34 35 // ErrAlreadyProcessed is returned by the trie sync when it's requested to process a 36 // node it already processed previously. 37 var ErrAlreadyProcessed = errors.New("already processed") 38 39 // maxFetchesPerDepth is the maximum number of pending trie nodes per depth. The 40 // role of this value is to limit the number of trie nodes that get expanded in 41 // memory if the node was configured with a significant number of peers. 42 const maxFetchesPerDepth = 16384 43 44 // SyncPath is a path tuple identifying a particular trie node either in a single 45 // trie (account) or a layered trie (account -> storage). 46 // 47 // Content wise the tuple either has 1 element if it addresses a node in a single 48 // trie or 2 elements if it addresses a node in a stacked trie. 49 // 50 // To support aiming arbitrary trie nodes, the path needs to support odd nibble 51 // lengths. To avoid transferring expanded hex form over the network, the last 52 // part of the tuple (which needs to index into the middle of a trie) is compact 53 // encoded. In case of a 2-tuple, the first item is always 32 bytes so that is 54 // simple binary encoded. 55 // 56 // Examples: 57 // - Path 0x9 -> {0x19} 58 // - Path 0x99 -> {0x0099} 59 // - Path 0x01234567890123456789012345678901012345678901234567890123456789019 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x19} 60 // - Path 0x012345678901234567890123456789010123456789012345678901234567890199 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x0099} 61 type SyncPath [][]byte 62 63 // NewSyncPath converts an expanded trie path from nibble form into a compact 64 // version that can be sent over the network. 65 func NewSyncPath(path []byte) SyncPath { 66 // If the hash is from the account trie, append a single item, if it 67 // is from the a storage trie, append a tuple. Note, the length 64 is 68 // clashing between account leaf and storage root. It's fine though 69 // because having a trie node at 64 depth means a hash collision was 70 // found and we're long dead. 71 if len(path) < 64 { 72 return SyncPath{hexToCompact(path)} 73 } 74 return SyncPath{hexToKeybytes(path[:64]), hexToCompact(path[64:])} 75 } 76 77 // nodeRequest represents a scheduled or already in-flight trie node retrieval request. 78 type nodeRequest struct { 79 hash common.Hash // Hash of the trie node to retrieve 80 path []byte // Merkle path leading to this node for prioritization 81 data []byte // Data content of the node, cached until all subtrees complete 82 83 parent *nodeRequest // Parent state node referencing this entry 84 deps int // Number of dependencies before allowed to commit this node 85 callback LeafCallback // Callback to invoke if a leaf node it reached on this branch 86 } 87 88 // codeRequest represents a scheduled or already in-flight bytecode retrieval request. 89 type codeRequest struct { 90 hash common.Hash // Hash of the contract bytecode to retrieve 91 path []byte // Merkle path leading to this node for prioritization 92 data []byte // Data content of the node, cached until all subtrees complete 93 parents []*nodeRequest // Parent state nodes referencing this entry (notify all upon completion) 94 } 95 96 // NodeSyncResult is a response with requested trie node along with its node path. 97 type NodeSyncResult struct { 98 Path string // Path of the originally unknown trie node 99 Data []byte // Data content of the retrieved trie node 100 } 101 102 // CodeSyncResult is a response with requested bytecode along with its hash. 103 type CodeSyncResult struct { 104 Hash common.Hash // Hash the originally unknown bytecode 105 Data []byte // Data content of the retrieved bytecode 106 } 107 108 // syncMemBatch is an in-memory buffer of successfully downloaded but not yet 109 // persisted data items. 110 type syncMemBatch struct { 111 nodes map[string][]byte // In-memory membatch of recently completed nodes 112 hashes map[string]common.Hash // Hashes of recently completed nodes 113 codes map[common.Hash][]byte // In-memory membatch of recently completed codes 114 size uint64 // Estimated batch-size of in-memory data. 115 } 116 117 // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes. 118 func newSyncMemBatch() *syncMemBatch { 119 return &syncMemBatch{ 120 nodes: make(map[string][]byte), 121 hashes: make(map[string]common.Hash), 122 codes: make(map[common.Hash][]byte), 123 } 124 } 125 126 // hasNode reports the trie node with specific path is already cached. 127 func (batch *syncMemBatch) hasNode(path []byte) bool { 128 _, ok := batch.nodes[string(path)] 129 return ok 130 } 131 132 // hasCode reports the contract code with specific hash is already cached. 133 func (batch *syncMemBatch) hasCode(hash common.Hash) bool { 134 _, ok := batch.codes[hash] 135 return ok 136 } 137 138 // Sync is the main state trie synchronisation scheduler, which provides yet 139 // unknown trie hashes to retrieve, accepts node data associated with said hashes 140 // and reconstructs the trie step by step until all is done. 141 type Sync struct { 142 database ethdb.KeyValueReader // Persistent database to check for existing entries 143 membatch *syncMemBatch // Memory buffer to avoid frequent database writes 144 nodeReqs map[string]*nodeRequest // Pending requests pertaining to a trie node path 145 codeReqs map[common.Hash]*codeRequest // Pending requests pertaining to a code hash 146 queue *prque.Prque // Priority queue with the pending requests 147 fetches map[int]int // Number of active fetches per trie node depth 148 } 149 150 // NewSync creates a new trie data download scheduler. 151 func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallback) *Sync { 152 ts := &Sync{ 153 database: database, 154 membatch: newSyncMemBatch(), 155 nodeReqs: make(map[string]*nodeRequest), 156 codeReqs: make(map[common.Hash]*codeRequest), 157 queue: prque.New(nil), 158 fetches: make(map[int]int), 159 } 160 ts.AddSubTrie(root, nil, common.Hash{}, nil, callback) 161 return ts 162 } 163 164 // AddSubTrie registers a new trie to the sync code, rooted at the designated 165 // parent for completion tracking. The given path is a unique node path in 166 // hex format and contain all the parent path if it's layered trie node. 167 func (s *Sync) AddSubTrie(root common.Hash, path []byte, parent common.Hash, parentPath []byte, callback LeafCallback) { 168 // Short circuit if the trie is empty or already known 169 if root == emptyRoot { 170 return 171 } 172 if s.membatch.hasNode(path) { 173 return 174 } 175 if rawdb.HasTrieNode(s.database, root) { 176 return 177 } 178 // Assemble the new sub-trie sync request 179 req := &nodeRequest{ 180 hash: root, 181 path: path, 182 callback: callback, 183 } 184 // If this sub-trie has a designated parent, link them together 185 if parent != (common.Hash{}) { 186 ancestor := s.nodeReqs[string(parentPath)] 187 if ancestor == nil { 188 panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent)) 189 } 190 ancestor.deps++ 191 req.parent = ancestor 192 } 193 s.scheduleNodeRequest(req) 194 } 195 196 // AddCodeEntry schedules the direct retrieval of a contract code that should not 197 // be interpreted as a trie node, but rather accepted and stored into the database 198 // as is. 199 func (s *Sync) AddCodeEntry(hash common.Hash, path []byte, parent common.Hash, parentPath []byte) { 200 // Short circuit if the entry is empty or already known 201 if hash == emptyState { 202 return 203 } 204 if s.membatch.hasCode(hash) { 205 return 206 } 207 // If database says duplicate, the blob is present for sure. 208 // Note we only check the existence with new code scheme, fast 209 // sync is expected to run with a fresh new node. Even there 210 // exists the code with legacy format, fetch and store with 211 // new scheme anyway. 212 if rawdb.HasCodeWithPrefix(s.database, hash) { 213 return 214 } 215 // Assemble the new sub-trie sync request 216 req := &codeRequest{ 217 path: path, 218 hash: hash, 219 } 220 // If this sub-trie has a designated parent, link them together 221 if parent != (common.Hash{}) { 222 ancestor := s.nodeReqs[string(parentPath)] // the parent of codereq can ONLY be nodereq 223 if ancestor == nil { 224 panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent)) 225 } 226 ancestor.deps++ 227 req.parents = append(req.parents, ancestor) 228 } 229 s.scheduleCodeRequest(req) 230 } 231 232 // Missing retrieves the known missing nodes from the trie for retrieval. To aid 233 // both eth/6x style fast sync and snap/1x style state sync, the paths of trie 234 // nodes are returned too, as well as separate hash list for codes. 235 func (s *Sync) Missing(max int) ([]string, []common.Hash, []common.Hash) { 236 var ( 237 nodePaths []string 238 nodeHashes []common.Hash 239 codeHashes []common.Hash 240 ) 241 for !s.queue.Empty() && (max == 0 || len(nodeHashes)+len(codeHashes) < max) { 242 // Retrieve the next item in line 243 item, prio := s.queue.Peek() 244 245 // If we have too many already-pending tasks for this depth, throttle 246 depth := int(prio >> 56) 247 if s.fetches[depth] > maxFetchesPerDepth { 248 break 249 } 250 // Item is allowed to be scheduled, add it to the task list 251 s.queue.Pop() 252 s.fetches[depth]++ 253 254 switch item := item.(type) { 255 case common.Hash: 256 codeHashes = append(codeHashes, item) 257 case string: 258 req, ok := s.nodeReqs[item] 259 if !ok { 260 log.Error("Missing node request", "path", item) 261 continue // System very wrong, shouldn't happen 262 } 263 nodePaths = append(nodePaths, item) 264 nodeHashes = append(nodeHashes, req.hash) 265 } 266 } 267 return nodePaths, nodeHashes, codeHashes 268 } 269 270 // ProcessCode injects the received data for requested item. Note it can 271 // happpen that the single response commits two pending requests(e.g. 272 // there are two requests one for code and one for node but the hash 273 // is same). In this case the second response for the same hash will 274 // be treated as "non-requested" item or "already-processed" item but 275 // there is no downside. 276 func (s *Sync) ProcessCode(result CodeSyncResult) error { 277 // If the code was not requested or it's already processed, bail out 278 req := s.codeReqs[result.Hash] 279 if req == nil { 280 return ErrNotRequested 281 } 282 if req.data != nil { 283 return ErrAlreadyProcessed 284 } 285 req.data = result.Data 286 return s.commitCodeRequest(req) 287 } 288 289 // ProcessNode injects the received data for requested item. Note it can 290 // happen that the single response commits two pending requests(e.g. 291 // there are two requests one for code and one for node but the hash 292 // is same). In this case the second response for the same hash will 293 // be treated as "non-requested" item or "already-processed" item but 294 // there is no downside. 295 func (s *Sync) ProcessNode(result NodeSyncResult) error { 296 // If the trie node was not requested or it's already processed, bail out 297 req := s.nodeReqs[result.Path] 298 if req == nil { 299 return ErrNotRequested 300 } 301 if req.data != nil { 302 return ErrAlreadyProcessed 303 } 304 // Decode the node data content and update the request 305 node, err := decodeNode(req.hash.Bytes(), result.Data) 306 if err != nil { 307 return err 308 } 309 req.data = result.Data 310 311 // Create and schedule a request for all the children nodes 312 requests, err := s.children(req, node) 313 if err != nil { 314 return err 315 } 316 if len(requests) == 0 && req.deps == 0 { 317 s.commitNodeRequest(req) 318 } else { 319 req.deps += len(requests) 320 for _, child := range requests { 321 s.scheduleNodeRequest(child) 322 } 323 } 324 return nil 325 } 326 327 // Commit flushes the data stored in the internal membatch out to persistent 328 // storage, returning any occurred error. 329 func (s *Sync) Commit(dbw ethdb.Batch) error { 330 // Dump the membatch into a database dbw 331 for path, value := range s.membatch.nodes { 332 rawdb.WriteTrieNode(dbw, s.membatch.hashes[path], value) 333 } 334 for hash, value := range s.membatch.codes { 335 rawdb.WriteCode(dbw, hash, value) 336 } 337 // Drop the membatch data and return 338 s.membatch = newSyncMemBatch() 339 return nil 340 } 341 342 // MemSize returns an estimated size (in bytes) of the data held in the membatch. 343 func (s *Sync) MemSize() uint64 { 344 return s.membatch.size 345 } 346 347 // Pending returns the number of state entries currently pending for download. 348 func (s *Sync) Pending() int { 349 return len(s.nodeReqs) + len(s.codeReqs) 350 } 351 352 // schedule inserts a new state retrieval request into the fetch queue. If there 353 // is already a pending request for this node, the new request will be discarded 354 // and only a parent reference added to the old one. 355 func (s *Sync) scheduleNodeRequest(req *nodeRequest) { 356 s.nodeReqs[string(req.path)] = req 357 358 // Schedule the request for future retrieval. This queue is shared 359 // by both node requests and code requests. 360 prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents 361 for i := 0; i < 14 && i < len(req.path); i++ { 362 prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order 363 } 364 s.queue.Push(string(req.path), prio) 365 } 366 367 // schedule inserts a new state retrieval request into the fetch queue. If there 368 // is already a pending request for this node, the new request will be discarded 369 // and only a parent reference added to the old one. 370 func (s *Sync) scheduleCodeRequest(req *codeRequest) { 371 // If we're already requesting this node, add a new reference and stop 372 if old, ok := s.codeReqs[req.hash]; ok { 373 old.parents = append(old.parents, req.parents...) 374 return 375 } 376 s.codeReqs[req.hash] = req 377 378 // Schedule the request for future retrieval. This queue is shared 379 // by both node requests and code requests. 380 prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents 381 for i := 0; i < 14 && i < len(req.path); i++ { 382 prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order 383 } 384 s.queue.Push(req.hash, prio) 385 } 386 387 // children retrieves all the missing children of a state trie entry for future 388 // retrieval scheduling. 389 func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) { 390 // Gather all the children of the node, irrelevant whether known or not 391 type childNode struct { 392 path []byte 393 node node 394 } 395 var children []childNode 396 397 switch node := (object).(type) { 398 case *shortNode: 399 key := node.Key 400 if hasTerm(key) { 401 key = key[:len(key)-1] 402 } 403 children = []childNode{{ 404 node: node.Val, 405 path: append(append([]byte(nil), req.path...), key...), 406 }} 407 case *fullNode: 408 for i := 0; i < 17; i++ { 409 if node.Children[i] != nil { 410 children = append(children, childNode{ 411 node: node.Children[i], 412 path: append(append([]byte(nil), req.path...), byte(i)), 413 }) 414 } 415 } 416 default: 417 panic(fmt.Sprintf("unknown node: %+v", node)) 418 } 419 // Iterate over the children, and request all unknown ones 420 var ( 421 missing = make(chan *nodeRequest, len(children)) 422 pending sync.WaitGroup 423 ) 424 for _, child := range children { 425 // Notify any external watcher of a new key/value node 426 if req.callback != nil { 427 if node, ok := (child.node).(valueNode); ok { 428 var paths [][]byte 429 if len(child.path) == 2*common.HashLength { 430 paths = append(paths, hexToKeybytes(child.path)) 431 } else if len(child.path) == 4*common.HashLength { 432 paths = append(paths, hexToKeybytes(child.path[:2*common.HashLength])) 433 paths = append(paths, hexToKeybytes(child.path[2*common.HashLength:])) 434 } 435 if err := req.callback(paths, child.path, node, req.hash, req.path); err != nil { 436 return nil, err 437 } 438 } 439 } 440 // If the child references another node, resolve or schedule 441 if node, ok := (child.node).(hashNode); ok { 442 // Try to resolve the node from the local database 443 if s.membatch.hasNode(child.path) { 444 continue 445 } 446 // Check the presence of children concurrently 447 pending.Add(1) 448 go func(child childNode) { 449 defer pending.Done() 450 451 // If database says duplicate, then at least the trie node is present 452 // and we hold the assumption that it's NOT legacy contract code. 453 chash := common.BytesToHash(node) 454 if rawdb.HasTrieNode(s.database, chash) { 455 return 456 } 457 // Locally unknown node, schedule for retrieval 458 missing <- &nodeRequest{ 459 path: child.path, 460 hash: chash, 461 parent: req, 462 callback: req.callback, 463 } 464 }(child) 465 } 466 } 467 pending.Wait() 468 469 requests := make([]*nodeRequest, 0, len(children)) 470 for done := false; !done; { 471 select { 472 case miss := <-missing: 473 requests = append(requests, miss) 474 default: 475 done = true 476 } 477 } 478 return requests, nil 479 } 480 481 // commit finalizes a retrieval request and stores it into the membatch. If any 482 // of the referencing parent requests complete due to this commit, they are also 483 // committed themselves. 484 func (s *Sync) commitNodeRequest(req *nodeRequest) error { 485 // Write the node content to the membatch 486 s.membatch.nodes[string(req.path)] = req.data 487 s.membatch.hashes[string(req.path)] = req.hash 488 // The size tracking refers to the db-batch, not the in-memory data. 489 // Therefore, we ignore the req.path, and account only for the hash+data 490 // which eventually is written to db. 491 s.membatch.size += common.HashLength + uint64(len(req.data)) 492 delete(s.nodeReqs, string(req.path)) 493 s.fetches[len(req.path)]-- 494 495 // Check parent for completion 496 if req.parent != nil { 497 req.parent.deps-- 498 if req.parent.deps == 0 { 499 if err := s.commitNodeRequest(req.parent); err != nil { 500 return err 501 } 502 } 503 } 504 return nil 505 } 506 507 // commit finalizes a retrieval request and stores it into the membatch. If any 508 // of the referencing parent requests complete due to this commit, they are also 509 // committed themselves. 510 func (s *Sync) commitCodeRequest(req *codeRequest) error { 511 // Write the node content to the membatch 512 s.membatch.codes[req.hash] = req.data 513 s.membatch.size += common.HashLength + uint64(len(req.data)) 514 delete(s.codeReqs, req.hash) 515 s.fetches[len(req.path)]-- 516 517 // Check all parents for completion 518 for _, parent := range req.parents { 519 parent.deps-- 520 if parent.deps == 0 { 521 if err := s.commitNodeRequest(parent); err != nil { 522 return err 523 } 524 } 525 } 526 return nil 527 }