github.com/beyonderyue/gochain@v2.2.26+incompatible/eth/downloader/statesync.go (about) 1 // Copyright 2017 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package downloader 18 19 import ( 20 "context" 21 "fmt" 22 "hash" 23 "sync" 24 "time" 25 26 "go.opencensus.io/trace" 27 28 "github.com/gochain-io/gochain/common" 29 "github.com/gochain-io/gochain/core/rawdb" 30 "github.com/gochain-io/gochain/core/state" 31 "github.com/gochain-io/gochain/crypto/sha3" 32 "github.com/gochain-io/gochain/ethdb" 33 "github.com/gochain-io/gochain/log" 34 "github.com/gochain-io/gochain/trie" 35 ) 36 37 // stateReq represents a batch of state fetch requests grouped together into 38 // a single data retrieval network packet. 39 type stateReq struct { 40 items []common.Hash // Hashes of the state items to download 41 tasks map[common.Hash]*stateTask // Download tasks to track previous attempts 42 timeout time.Duration // Maximum round trip time for this to complete 43 timer *time.Timer // Timer to fire when the RTT timeout expires 44 peer *peerConnection // Peer that we're requesting from 45 response [][]byte // Response data of the peer (nil for timeouts) 46 dropped bool // Flag whether the peer dropped off early 47 } 48 49 // timedOut returns if this request timed out. 50 func (req *stateReq) timedOut() bool { 51 return req.response == nil 52 } 53 54 // stateSyncStats is a collection of progress stats to report during a state trie 55 // sync to RPC requests as well as to display in user logs. 56 type stateSyncStats struct { 57 processed uint64 // Number of state entries processed 58 duplicate uint64 // Number of state entries downloaded twice 59 unexpected uint64 // Number of non-requested state entries received 60 pending uint64 // Number of still pending state entries 61 } 62 63 // syncState starts downloading state with the given root hash. 64 func (d *Downloader) syncState(root common.Hash) *stateSync { 65 s := newStateSync(d, root) 66 select { 67 case d.stateSyncStart <- s: 68 case <-d.quitCh: 69 s.err = errCancelStateFetch 70 close(s.done) 71 } 72 return s 73 } 74 75 // stateFetcher manages the active state sync and accepts requests 76 // on its behalf. 77 func (d *Downloader) stateFetcher() { 78 for { 79 select { 80 case s := <-d.stateSyncStart: 81 for next := s; next != nil; { 82 next = d.runStateSync(next) 83 } 84 case <-d.stateCh: 85 // Ignore state responses while no sync is running. 86 case <-d.quitCh: 87 return 88 } 89 } 90 } 91 92 // runStateSync runs a state synchronisation until it completes or another root 93 // hash is requested to be switched over to. 94 func (d *Downloader) runStateSync(s *stateSync) *stateSync { 95 var ( 96 active = make(map[string]*stateReq) // Currently in-flight requests 97 finished []*stateReq // Completed or failed requests 98 timeout = make(chan *stateReq) // Timed out active requests 99 ) 100 defer func() { 101 // Cancel active request timers on exit. Also set peers to idle so they're 102 // available for the next sync. 103 for _, req := range active { 104 req.timer.Stop() 105 req.peer.SetNodeDataIdle(len(req.items)) 106 } 107 }() 108 // Run the state sync. 109 go s.run() 110 defer s.Cancel() 111 112 // Listen for peer departure events to cancel assigned tasks 113 peerDrop := make(chan *peerConnection, 1024) 114 peerSub := s.d.peers.SubscribePeerDrops(peerDrop) 115 defer peerSub.Unsubscribe() 116 117 for { 118 // Enable sending of the first buffered element if there is one. 119 var ( 120 deliverReq *stateReq 121 deliverReqCh chan *stateReq 122 ) 123 if len(finished) > 0 { 124 deliverReq = finished[0] 125 deliverReqCh = s.deliver 126 } 127 128 select { 129 // The stateSync lifecycle: 130 case next := <-d.stateSyncStart: 131 return next 132 133 case <-s.done: 134 return nil 135 136 // Send the next finished request to the current sync: 137 case deliverReqCh <- deliverReq: 138 // Shift out the first request, but also set the emptied slot to nil for GC 139 copy(finished, finished[1:]) 140 finished[len(finished)-1] = nil 141 finished = finished[:len(finished)-1] 142 143 // Handle incoming state packs: 144 case pack := <-d.stateCh: 145 // Discard any data not requested (or previously timed out) 146 req := active[pack.PeerId()] 147 if req == nil { 148 log.Debug("Unrequested node data", "peer", pack.PeerId(), "len", pack.Items()) 149 continue 150 } 151 // Finalize the request and queue up for processing 152 req.timer.Stop() 153 req.response = pack.(*statePack).states 154 155 finished = append(finished, req) 156 delete(active, pack.PeerId()) 157 158 // Handle dropped peer connections: 159 case p := <-peerDrop: 160 // Skip if no request is currently pending 161 req := active[p.id] 162 if req == nil { 163 continue 164 } 165 // Finalize the request and queue up for processing 166 req.timer.Stop() 167 req.dropped = true 168 169 finished = append(finished, req) 170 delete(active, p.id) 171 172 // Handle timed-out requests: 173 case req := <-timeout: 174 // If the peer is already requesting something else, ignore the stale timeout. 175 // This can happen when the timeout and the delivery happens simultaneously, 176 // causing both pathways to trigger. 177 if active[req.peer.id] != req { 178 continue 179 } 180 // Move the timed out data back into the download queue 181 finished = append(finished, req) 182 delete(active, req.peer.id) 183 184 // Track outgoing state requests: 185 case req := <-d.trackStateReq: 186 // If an active request already exists for this peer, we have a problem. In 187 // theory the trie node schedule must never assign two requests to the same 188 // peer. In practice however, a peer might receive a request, disconnect and 189 // immediately reconnect before the previous times out. In this case the first 190 // request is never honored, alas we must not silently overwrite it, as that 191 // causes valid requests to go missing and sync to get stuck. 192 if old := active[req.peer.id]; old != nil { 193 log.Warn("Busy peer assigned new state fetch", "peer", old.peer.id) 194 195 // Make sure the previous one doesn't get siletly lost 196 old.timer.Stop() 197 old.dropped = true 198 199 finished = append(finished, old) 200 } 201 // Start a timer to notify the sync loop if the peer stalled. 202 req.timer = time.AfterFunc(req.timeout, func() { 203 select { 204 case timeout <- req: 205 case <-s.done: 206 // Prevent leaking of timer goroutines in the unlikely case where a 207 // timer is fired just before exiting runStateSync. 208 } 209 }) 210 active[req.peer.id] = req 211 } 212 } 213 } 214 215 // stateSync schedules requests for downloading a particular state trie defined 216 // by a given state root. 217 type stateSync struct { 218 d *Downloader // Downloader instance to access and manage current peerset 219 220 sched *trie.Sync // State trie sync scheduler defining the tasks 221 keccak hash.Hash // Keccak256 hasher to verify deliveries with 222 tasks map[common.Hash]*stateTask // Set of tasks currently queued for retrieval 223 224 numUncommitted int 225 bytesUncommitted int 226 227 deliver chan *stateReq // Delivery channel multiplexing peer responses 228 cancel chan struct{} // Channel to signal a termination request 229 cancelOnce sync.Once // Ensures cancel only ever gets called once 230 done chan struct{} // Channel to signal termination completion 231 err error // Any error hit during sync (set before completion) 232 } 233 234 // stateTask represents a single trie node download task, containing a set of 235 // peers already attempted retrieval from to detect stalled syncs and abort. 236 type stateTask struct { 237 attempts map[string]struct{} 238 } 239 240 // newStateSync creates a new state trie download scheduler. This method does not 241 // yet start the sync. The user needs to call run to initiate. 242 func newStateSync(d *Downloader, root common.Hash) *stateSync { 243 return &stateSync{ 244 d: d, 245 sched: state.NewStateSync(root, d.stateDB.GlobalTable()), 246 keccak: sha3.NewKeccak256(), 247 tasks: make(map[common.Hash]*stateTask), 248 deliver: make(chan *stateReq), 249 cancel: make(chan struct{}), 250 done: make(chan struct{}), 251 } 252 } 253 254 // run starts the task assignment and response processing loop, blocking until 255 // it finishes, and finally notifying any goroutines waiting for the loop to 256 // finish. 257 func (s *stateSync) run() { 258 s.err = s.loop() 259 close(s.done) 260 } 261 262 // Wait blocks until the sync is done or canceled. 263 func (s *stateSync) Wait() error { 264 <-s.done 265 return s.err 266 } 267 268 // Cancel cancels the sync and waits until it has shut down. 269 func (s *stateSync) Cancel() error { 270 s.cancelOnce.Do(func() { close(s.cancel) }) 271 return s.Wait() 272 } 273 274 // loop is the main event loop of a state trie sync. It it responsible for the 275 // assignment of new tasks to peers (including sending it to them) as well as 276 // for the processing of inbound data. Note, that the loop does not directly 277 // receive data from peers, rather those are buffered up in the downloader and 278 // pushed here async. The reason is to decouple processing from data receipt 279 // and timeouts. 280 func (s *stateSync) loop() (err error) { 281 // Listen for new peer events to assign tasks to them 282 newPeer := make(chan *peerConnection, 1024) 283 peerSub := s.d.peers.SubscribeNewPeers(newPeer) 284 defer peerSub.Unsubscribe() 285 defer func() { 286 cerr := s.commit(true) 287 if err == nil { 288 err = cerr 289 } 290 }() 291 292 // Keep assigning new tasks until the sync completes or aborts 293 for s.sched.Pending() > 0 { 294 if err = s.commit(false); err != nil { 295 return err 296 } 297 s.assignTasks() 298 // Tasks assigned, wait for something to happen 299 select { 300 case <-newPeer: 301 // New peer arrived, try to assign it download tasks 302 303 case <-s.cancel: 304 return errCancelStateFetch 305 306 case <-s.d.cancelCh: 307 return errCancelStateFetch 308 309 case req := <-s.deliver: 310 // Response, disconnect or timeout triggered, drop the peer if stalling 311 log.Trace("Received node data response", "peer", req.peer.id, "count", len(req.response), "dropped", req.dropped, "timeout", !req.dropped && req.timedOut()) 312 if len(req.items) <= 2 && !req.dropped && req.timedOut() { 313 // 2 items are the minimum requested, if even that times out, we've no use of 314 // this peer at the moment. 315 log.Warn("Stalling state sync, dropping peer", "peer", req.peer.id) 316 s.d.dropPeer(req.peer.id) 317 } 318 // Process all the received blobs and check for stale delivery 319 if err = s.process(req); err != nil { 320 log.Warn("Node data write error", "err", err) 321 return err 322 } 323 req.peer.SetNodeDataIdle(len(req.response)) 324 } 325 } 326 return nil 327 } 328 329 func (s *stateSync) commit(force bool) error { 330 if !force && s.bytesUncommitted < ethdb.IdealBatchSize { 331 return nil 332 } 333 start := time.Now() 334 b := s.d.stateDB.GlobalTable().NewBatch() 335 if written, err := s.sched.Commit(b); written == 0 || err != nil { 336 return err 337 } 338 if err := b.Write(); err != nil { 339 return fmt.Errorf("DB write error: %v", err) 340 } 341 s.updateStats(s.numUncommitted, 0, 0, time.Since(start)) 342 s.numUncommitted = 0 343 s.bytesUncommitted = 0 344 return nil 345 } 346 347 // assignTasks attempts to assign new tasks to all idle peers, either from the 348 // batch currently being retried, or fetching new data from the trie sync itself. 349 func (s *stateSync) assignTasks() { 350 ctx, span := trace.StartSpan(context.Background(), "stateSync.assignTasks") 351 defer span.End() 352 // Iterate over all idle peers and try to assign them state fetches 353 peers, _ := s.d.peers.NodeDataIdlePeers() 354 for _, p := range peers { 355 // Assign a batch of fetches proportional to the estimated latency/bandwidth 356 cap := p.NodeDataCapacity(s.d.requestRTT()) 357 req := &stateReq{peer: p, timeout: s.d.requestTTL()} 358 s.fillTasks(cap, req) 359 360 // If the peer was assigned tasks to fetch, send the network request 361 if len(req.items) > 0 { 362 req.peer.log.Trace("Requesting new batch of data", "type", "state", "count", len(req.items)) 363 select { 364 case s.d.trackStateReq <- req: 365 req.peer.FetchNodeData(ctx, req.items) 366 case <-s.cancel: 367 case <-s.d.cancelCh: 368 } 369 } 370 } 371 } 372 373 // fillTasks fills the given request object with a maximum of n state download 374 // tasks to send to the remote peer. 375 func (s *stateSync) fillTasks(n int, req *stateReq) { 376 // Refill available tasks from the scheduler. 377 if len(s.tasks) < n { 378 new := s.sched.Missing(n - len(s.tasks)) 379 for _, hash := range new { 380 s.tasks[hash] = &stateTask{make(map[string]struct{})} 381 } 382 } 383 // Find tasks that haven't been tried with the request's peer. 384 req.items = make([]common.Hash, 0, n) 385 req.tasks = make(map[common.Hash]*stateTask, n) 386 for hash, t := range s.tasks { 387 // Stop when we've gathered enough requests 388 if len(req.items) == n { 389 break 390 } 391 // Skip any requests we've already tried from this peer 392 if _, ok := t.attempts[req.peer.id]; ok { 393 continue 394 } 395 // Assign the request to this peer 396 t.attempts[req.peer.id] = struct{}{} 397 req.items = append(req.items, hash) 398 req.tasks[hash] = t 399 delete(s.tasks, hash) 400 } 401 } 402 403 // process iterates over a batch of delivered state data, injecting each item 404 // into a running state sync, re-queuing any items that were requested but not 405 // delivered. 406 func (s *stateSync) process(req *stateReq) error { 407 // Collect processing stats and update progress if valid data was received 408 duplicate, unexpected := 0, 0 409 410 defer func(start time.Time) { 411 if duplicate > 0 || unexpected > 0 { 412 s.updateStats(0, duplicate, unexpected, time.Since(start)) 413 } 414 }(time.Now()) 415 416 // Iterate over all the delivered data and inject one-by-one into the trie 417 progress := false 418 419 for _, blob := range req.response { 420 prog, hash, err := s.processNodeData(blob) 421 switch err { 422 case nil: 423 s.numUncommitted++ 424 s.bytesUncommitted += len(blob) 425 progress = progress || prog 426 case trie.ErrNotRequested: 427 unexpected++ 428 case trie.ErrAlreadyProcessed: 429 duplicate++ 430 default: 431 return fmt.Errorf("invalid state node %s: %v", hash.TerminalString(), err) 432 } 433 if _, ok := req.tasks[hash]; ok { 434 delete(req.tasks, hash) 435 } 436 } 437 // Put unfulfilled tasks back into the retry queue 438 npeers := s.d.peers.Len() 439 for hash, task := range req.tasks { 440 // If the node did deliver something, missing items may be due to a protocol 441 // limit or a previous timeout + delayed delivery. Both cases should permit 442 // the node to retry the missing items (to avoid single-peer stalls). 443 if len(req.response) > 0 || req.timedOut() { 444 delete(task.attempts, req.peer.id) 445 } 446 // If we've requested the node too many times already, it may be a malicious 447 // sync where nobody has the right data. Abort. 448 if len(task.attempts) >= npeers { 449 return fmt.Errorf("state node %s failed with all peers (%d tries, %d peers)", hash.TerminalString(), len(task.attempts), npeers) 450 } 451 // Missing item, place into the retry queue. 452 s.tasks[hash] = task 453 } 454 return nil 455 } 456 457 // processNodeData tries to inject a trie node data blob delivered from a remote 458 // peer into the state trie, returning whether anything useful was written or any 459 // error occurred. 460 func (s *stateSync) processNodeData(blob []byte) (bool, common.Hash, error) { 461 res := trie.SyncResult{Data: blob} 462 s.keccak.Reset() 463 s.keccak.Write(blob) 464 s.keccak.Sum(res.Hash[:0]) 465 committed, _, err := s.sched.Process([]trie.SyncResult{res}) 466 return committed, res.Hash, err 467 } 468 469 // updateStats bumps the various state sync progress counters and displays a log 470 // message for the user to see. 471 func (s *stateSync) updateStats(written, duplicate, unexpected int, duration time.Duration) { 472 s.d.syncStatsLock.Lock() 473 defer s.d.syncStatsLock.Unlock() 474 475 s.d.syncStatsState.pending = uint64(s.sched.Pending()) 476 s.d.syncStatsState.processed += uint64(written) 477 s.d.syncStatsState.duplicate += uint64(duplicate) 478 s.d.syncStatsState.unexpected += uint64(unexpected) 479 480 if written > 0 || duplicate > 0 || unexpected > 0 { 481 log.Info("Imported new state entries", "count", written, "elapsed", common.PrettyDuration(duration), "processed", s.d.syncStatsState.processed, "pending", s.d.syncStatsState.pending, "retry", len(s.tasks), "duplicate", s.d.syncStatsState.duplicate, "unexpected", s.d.syncStatsState.unexpected) 482 } 483 if written > 0 { 484 rawdb.WriteFastTrieProgress(s.d.stateDB.GlobalTable(), s.d.syncStatsState.processed) 485 } 486 }