github.com/bcnmy/go-ethereum@v1.10.27/trie/sync.go (about) 1 // Copyright 2015 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package trie 18 19 import ( 20 "errors" 21 "fmt" 22 "sync" 23 24 "github.com/ethereum/go-ethereum/common" 25 "github.com/ethereum/go-ethereum/common/prque" 26 "github.com/ethereum/go-ethereum/core/rawdb" 27 "github.com/ethereum/go-ethereum/ethdb" 28 "github.com/ethereum/go-ethereum/log" 29 ) 30 31 // ErrNotRequested is returned by the trie sync when it's requested to process a 32 // node it did not request. 33 var ErrNotRequested = errors.New("not requested") 34 35 // ErrAlreadyProcessed is returned by the trie sync when it's requested to process a 36 // node it already processed previously. 37 var ErrAlreadyProcessed = errors.New("already processed") 38 39 // maxFetchesPerDepth is the maximum number of pending trie nodes per depth. The 40 // role of this value is to limit the number of trie nodes that get expanded in 41 // memory if the node was configured with a significant number of peers. 42 const maxFetchesPerDepth = 16384 43 44 // SyncPath is a path tuple identifying a particular trie node either in a single 45 // trie (account) or a layered trie (account -> storage). 46 // 47 // Content wise the tuple either has 1 element if it addresses a node in a single 48 // trie or 2 elements if it addresses a node in a stacked trie. 49 // 50 // To support aiming arbitrary trie nodes, the path needs to support odd nibble 51 // lengths. To avoid transferring expanded hex form over the network, the last 52 // part of the tuple (which needs to index into the middle of a trie) is compact 53 // encoded. In case of a 2-tuple, the first item is always 32 bytes so that is 54 // simple binary encoded. 55 // 56 // Examples: 57 // - Path 0x9 -> {0x19} 58 // - Path 0x99 -> {0x0099} 59 // - Path 0x01234567890123456789012345678901012345678901234567890123456789019 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x19} 60 // - Path 0x012345678901234567890123456789010123456789012345678901234567890199 -> {0x0123456789012345678901234567890101234567890123456789012345678901, 0x0099} 61 type SyncPath [][]byte 62 63 // NewSyncPath converts an expanded trie path from nibble form into a compact 64 // version that can be sent over the network. 65 func NewSyncPath(path []byte) SyncPath { 66 // If the hash is from the account trie, append a single item, if it 67 // is from the a storage trie, append a tuple. Note, the length 64 is 68 // clashing between account leaf and storage root. It's fine though 69 // because having a trie node at 64 depth means a hash collision was 70 // found and we're long dead. 71 if len(path) < 64 { 72 return SyncPath{hexToCompact(path)} 73 } 74 return SyncPath{hexToKeybytes(path[:64]), hexToCompact(path[64:])} 75 } 76 77 // nodeRequest represents a scheduled or already in-flight trie node retrieval request. 78 type nodeRequest struct { 79 hash common.Hash // Hash of the trie node to retrieve 80 path []byte // Merkle path leading to this node for prioritization 81 data []byte // Data content of the node, cached until all subtrees complete 82 83 parent *nodeRequest // Parent state node referencing this entry 84 deps int // Number of dependencies before allowed to commit this node 85 callback LeafCallback // Callback to invoke if a leaf node it reached on this branch 86 } 87 88 // codeRequest represents a scheduled or already in-flight bytecode retrieval request. 89 type codeRequest struct { 90 hash common.Hash // Hash of the contract bytecode to retrieve 91 path []byte // Merkle path leading to this node for prioritization 92 data []byte // Data content of the node, cached until all subtrees complete 93 parents []*nodeRequest // Parent state nodes referencing this entry (notify all upon completion) 94 } 95 96 // NodeSyncResult is a response with requested trie node along with its node path. 97 type NodeSyncResult struct { 98 Path string // Path of the originally unknown trie node 99 Data []byte // Data content of the retrieved trie node 100 } 101 102 // CodeSyncResult is a response with requested bytecode along with its hash. 103 type CodeSyncResult struct { 104 Hash common.Hash // Hash the originally unknown bytecode 105 Data []byte // Data content of the retrieved bytecode 106 } 107 108 // syncMemBatch is an in-memory buffer of successfully downloaded but not yet 109 // persisted data items. 110 type syncMemBatch struct { 111 nodes map[string][]byte // In-memory membatch of recently completed nodes 112 hashes map[string]common.Hash // Hashes of recently completed nodes 113 codes map[common.Hash][]byte // In-memory membatch of recently completed codes 114 } 115 116 // newSyncMemBatch allocates a new memory-buffer for not-yet persisted trie nodes. 117 func newSyncMemBatch() *syncMemBatch { 118 return &syncMemBatch{ 119 nodes: make(map[string][]byte), 120 hashes: make(map[string]common.Hash), 121 codes: make(map[common.Hash][]byte), 122 } 123 } 124 125 // hasNode reports the trie node with specific path is already cached. 126 func (batch *syncMemBatch) hasNode(path []byte) bool { 127 _, ok := batch.nodes[string(path)] 128 return ok 129 } 130 131 // hasCode reports the contract code with specific hash is already cached. 132 func (batch *syncMemBatch) hasCode(hash common.Hash) bool { 133 _, ok := batch.codes[hash] 134 return ok 135 } 136 137 // Sync is the main state trie synchronisation scheduler, which provides yet 138 // unknown trie hashes to retrieve, accepts node data associated with said hashes 139 // and reconstructs the trie step by step until all is done. 140 type Sync struct { 141 database ethdb.KeyValueReader // Persistent database to check for existing entries 142 membatch *syncMemBatch // Memory buffer to avoid frequent database writes 143 nodeReqs map[string]*nodeRequest // Pending requests pertaining to a trie node path 144 codeReqs map[common.Hash]*codeRequest // Pending requests pertaining to a code hash 145 queue *prque.Prque // Priority queue with the pending requests 146 fetches map[int]int // Number of active fetches per trie node depth 147 } 148 149 // NewSync creates a new trie data download scheduler. 150 func NewSync(root common.Hash, database ethdb.KeyValueReader, callback LeafCallback) *Sync { 151 ts := &Sync{ 152 database: database, 153 membatch: newSyncMemBatch(), 154 nodeReqs: make(map[string]*nodeRequest), 155 codeReqs: make(map[common.Hash]*codeRequest), 156 queue: prque.New(nil), 157 fetches: make(map[int]int), 158 } 159 ts.AddSubTrie(root, nil, common.Hash{}, nil, callback) 160 return ts 161 } 162 163 // AddSubTrie registers a new trie to the sync code, rooted at the designated 164 // parent for completion tracking. The given path is a unique node path in 165 // hex format and contain all the parent path if it's layered trie node. 166 func (s *Sync) AddSubTrie(root common.Hash, path []byte, parent common.Hash, parentPath []byte, callback LeafCallback) { 167 // Short circuit if the trie is empty or already known 168 if root == emptyRoot { 169 return 170 } 171 if s.membatch.hasNode(path) { 172 return 173 } 174 if rawdb.HasTrieNode(s.database, root) { 175 return 176 } 177 // Assemble the new sub-trie sync request 178 req := &nodeRequest{ 179 hash: root, 180 path: path, 181 callback: callback, 182 } 183 // If this sub-trie has a designated parent, link them together 184 if parent != (common.Hash{}) { 185 ancestor := s.nodeReqs[string(parentPath)] 186 if ancestor == nil { 187 panic(fmt.Sprintf("sub-trie ancestor not found: %x", parent)) 188 } 189 ancestor.deps++ 190 req.parent = ancestor 191 } 192 s.scheduleNodeRequest(req) 193 } 194 195 // AddCodeEntry schedules the direct retrieval of a contract code that should not 196 // be interpreted as a trie node, but rather accepted and stored into the database 197 // as is. 198 func (s *Sync) AddCodeEntry(hash common.Hash, path []byte, parent common.Hash, parentPath []byte) { 199 // Short circuit if the entry is empty or already known 200 if hash == emptyState { 201 return 202 } 203 if s.membatch.hasCode(hash) { 204 return 205 } 206 // If database says duplicate, the blob is present for sure. 207 // Note we only check the existence with new code scheme, fast 208 // sync is expected to run with a fresh new node. Even there 209 // exists the code with legacy format, fetch and store with 210 // new scheme anyway. 211 if rawdb.HasCodeWithPrefix(s.database, hash) { 212 return 213 } 214 // Assemble the new sub-trie sync request 215 req := &codeRequest{ 216 path: path, 217 hash: hash, 218 } 219 // If this sub-trie has a designated parent, link them together 220 if parent != (common.Hash{}) { 221 ancestor := s.nodeReqs[string(parentPath)] // the parent of codereq can ONLY be nodereq 222 if ancestor == nil { 223 panic(fmt.Sprintf("raw-entry ancestor not found: %x", parent)) 224 } 225 ancestor.deps++ 226 req.parents = append(req.parents, ancestor) 227 } 228 s.scheduleCodeRequest(req) 229 } 230 231 // Missing retrieves the known missing nodes from the trie for retrieval. To aid 232 // both eth/6x style fast sync and snap/1x style state sync, the paths of trie 233 // nodes are returned too, as well as separate hash list for codes. 234 func (s *Sync) Missing(max int) ([]string, []common.Hash, []common.Hash) { 235 var ( 236 nodePaths []string 237 nodeHashes []common.Hash 238 codeHashes []common.Hash 239 ) 240 for !s.queue.Empty() && (max == 0 || len(nodeHashes)+len(codeHashes) < max) { 241 // Retrieve the next item in line 242 item, prio := s.queue.Peek() 243 244 // If we have too many already-pending tasks for this depth, throttle 245 depth := int(prio >> 56) 246 if s.fetches[depth] > maxFetchesPerDepth { 247 break 248 } 249 // Item is allowed to be scheduled, add it to the task list 250 s.queue.Pop() 251 s.fetches[depth]++ 252 253 switch item := item.(type) { 254 case common.Hash: 255 codeHashes = append(codeHashes, item) 256 case string: 257 req, ok := s.nodeReqs[item] 258 if !ok { 259 log.Error("Missing node request", "path", item) 260 continue // System very wrong, shouldn't happen 261 } 262 nodePaths = append(nodePaths, item) 263 nodeHashes = append(nodeHashes, req.hash) 264 } 265 } 266 return nodePaths, nodeHashes, codeHashes 267 } 268 269 // ProcessCode injects the received data for requested item. Note it can 270 // happpen that the single response commits two pending requests(e.g. 271 // there are two requests one for code and one for node but the hash 272 // is same). In this case the second response for the same hash will 273 // be treated as "non-requested" item or "already-processed" item but 274 // there is no downside. 275 func (s *Sync) ProcessCode(result CodeSyncResult) error { 276 // If the code was not requested or it's already processed, bail out 277 req := s.codeReqs[result.Hash] 278 if req == nil { 279 return ErrNotRequested 280 } 281 if req.data != nil { 282 return ErrAlreadyProcessed 283 } 284 req.data = result.Data 285 return s.commitCodeRequest(req) 286 } 287 288 // ProcessNode injects the received data for requested item. Note it can 289 // happen that the single response commits two pending requests(e.g. 290 // there are two requests one for code and one for node but the hash 291 // is same). In this case the second response for the same hash will 292 // be treated as "non-requested" item or "already-processed" item but 293 // there is no downside. 294 func (s *Sync) ProcessNode(result NodeSyncResult) error { 295 // If the trie node was not requested or it's already processed, bail out 296 req := s.nodeReqs[result.Path] 297 if req == nil { 298 return ErrNotRequested 299 } 300 if req.data != nil { 301 return ErrAlreadyProcessed 302 } 303 // Decode the node data content and update the request 304 node, err := decodeNode(req.hash.Bytes(), result.Data) 305 if err != nil { 306 return err 307 } 308 req.data = result.Data 309 310 // Create and schedule a request for all the children nodes 311 requests, err := s.children(req, node) 312 if err != nil { 313 return err 314 } 315 if len(requests) == 0 && req.deps == 0 { 316 s.commitNodeRequest(req) 317 } else { 318 req.deps += len(requests) 319 for _, child := range requests { 320 s.scheduleNodeRequest(child) 321 } 322 } 323 return nil 324 } 325 326 // Commit flushes the data stored in the internal membatch out to persistent 327 // storage, returning any occurred error. 328 func (s *Sync) Commit(dbw ethdb.Batch) error { 329 // Dump the membatch into a database dbw 330 for path, value := range s.membatch.nodes { 331 rawdb.WriteTrieNode(dbw, s.membatch.hashes[path], value) 332 } 333 for hash, value := range s.membatch.codes { 334 rawdb.WriteCode(dbw, hash, value) 335 } 336 // Drop the membatch data and return 337 s.membatch = newSyncMemBatch() 338 return nil 339 } 340 341 // Pending returns the number of state entries currently pending for download. 342 func (s *Sync) Pending() int { 343 return len(s.nodeReqs) + len(s.codeReqs) 344 } 345 346 // schedule inserts a new state retrieval request into the fetch queue. If there 347 // is already a pending request for this node, the new request will be discarded 348 // and only a parent reference added to the old one. 349 func (s *Sync) scheduleNodeRequest(req *nodeRequest) { 350 s.nodeReqs[string(req.path)] = req 351 352 // Schedule the request for future retrieval. This queue is shared 353 // by both node requests and code requests. 354 prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents 355 for i := 0; i < 14 && i < len(req.path); i++ { 356 prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order 357 } 358 s.queue.Push(string(req.path), prio) 359 } 360 361 // schedule inserts a new state retrieval request into the fetch queue. If there 362 // is already a pending request for this node, the new request will be discarded 363 // and only a parent reference added to the old one. 364 func (s *Sync) scheduleCodeRequest(req *codeRequest) { 365 // If we're already requesting this node, add a new reference and stop 366 if old, ok := s.codeReqs[req.hash]; ok { 367 old.parents = append(old.parents, req.parents...) 368 return 369 } 370 s.codeReqs[req.hash] = req 371 372 // Schedule the request for future retrieval. This queue is shared 373 // by both node requests and code requests. 374 prio := int64(len(req.path)) << 56 // depth >= 128 will never happen, storage leaves will be included in their parents 375 for i := 0; i < 14 && i < len(req.path); i++ { 376 prio |= int64(15-req.path[i]) << (52 - i*4) // 15-nibble => lexicographic order 377 } 378 s.queue.Push(req.hash, prio) 379 } 380 381 // children retrieves all the missing children of a state trie entry for future 382 // retrieval scheduling. 383 func (s *Sync) children(req *nodeRequest, object node) ([]*nodeRequest, error) { 384 // Gather all the children of the node, irrelevant whether known or not 385 type childNode struct { 386 path []byte 387 node node 388 } 389 var children []childNode 390 391 switch node := (object).(type) { 392 case *shortNode: 393 key := node.Key 394 if hasTerm(key) { 395 key = key[:len(key)-1] 396 } 397 children = []childNode{{ 398 node: node.Val, 399 path: append(append([]byte(nil), req.path...), key...), 400 }} 401 case *fullNode: 402 for i := 0; i < 17; i++ { 403 if node.Children[i] != nil { 404 children = append(children, childNode{ 405 node: node.Children[i], 406 path: append(append([]byte(nil), req.path...), byte(i)), 407 }) 408 } 409 } 410 default: 411 panic(fmt.Sprintf("unknown node: %+v", node)) 412 } 413 // Iterate over the children, and request all unknown ones 414 var ( 415 missing = make(chan *nodeRequest, len(children)) 416 pending sync.WaitGroup 417 ) 418 for _, child := range children { 419 // Notify any external watcher of a new key/value node 420 if req.callback != nil { 421 if node, ok := (child.node).(valueNode); ok { 422 var paths [][]byte 423 if len(child.path) == 2*common.HashLength { 424 paths = append(paths, hexToKeybytes(child.path)) 425 } else if len(child.path) == 4*common.HashLength { 426 paths = append(paths, hexToKeybytes(child.path[:2*common.HashLength])) 427 paths = append(paths, hexToKeybytes(child.path[2*common.HashLength:])) 428 } 429 if err := req.callback(paths, child.path, node, req.hash, req.path); err != nil { 430 return nil, err 431 } 432 } 433 } 434 // If the child references another node, resolve or schedule 435 if node, ok := (child.node).(hashNode); ok { 436 // Try to resolve the node from the local database 437 if s.membatch.hasNode(child.path) { 438 continue 439 } 440 // Check the presence of children concurrently 441 pending.Add(1) 442 go func(child childNode) { 443 defer pending.Done() 444 445 // If database says duplicate, then at least the trie node is present 446 // and we hold the assumption that it's NOT legacy contract code. 447 chash := common.BytesToHash(node) 448 if rawdb.HasTrieNode(s.database, chash) { 449 return 450 } 451 // Locally unknown node, schedule for retrieval 452 missing <- &nodeRequest{ 453 path: child.path, 454 hash: chash, 455 parent: req, 456 callback: req.callback, 457 } 458 }(child) 459 } 460 } 461 pending.Wait() 462 463 requests := make([]*nodeRequest, 0, len(children)) 464 for done := false; !done; { 465 select { 466 case miss := <-missing: 467 requests = append(requests, miss) 468 default: 469 done = true 470 } 471 } 472 return requests, nil 473 } 474 475 // commit finalizes a retrieval request and stores it into the membatch. If any 476 // of the referencing parent requests complete due to this commit, they are also 477 // committed themselves. 478 func (s *Sync) commitNodeRequest(req *nodeRequest) error { 479 // Write the node content to the membatch 480 s.membatch.nodes[string(req.path)] = req.data 481 s.membatch.hashes[string(req.path)] = req.hash 482 483 delete(s.nodeReqs, string(req.path)) 484 s.fetches[len(req.path)]-- 485 486 // Check parent for completion 487 if req.parent != nil { 488 req.parent.deps-- 489 if req.parent.deps == 0 { 490 if err := s.commitNodeRequest(req.parent); err != nil { 491 return err 492 } 493 } 494 } 495 return nil 496 } 497 498 // commit finalizes a retrieval request and stores it into the membatch. If any 499 // of the referencing parent requests complete due to this commit, they are also 500 // committed themselves. 501 func (s *Sync) commitCodeRequest(req *codeRequest) error { 502 // Write the node content to the membatch 503 s.membatch.codes[req.hash] = req.data 504 delete(s.codeReqs, req.hash) 505 s.fetches[len(req.path)]-- 506 507 // Check all parents for completion 508 for _, parent := range req.parents { 509 parent.deps-- 510 if parent.deps == 0 { 511 if err := s.commitNodeRequest(parent); err != nil { 512 return err 513 } 514 } 515 } 516 return nil 517 }