github.com/unicornultrafoundation/go-u2u@v1.0.0-rc1.0.20240205080301-e74a83d3fadc/gossip/protocols/snap/snapstream/snapleecher/statesync.go (about) 1 // Copyright 2017 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package snapleecher 18 19 import ( 20 "sync" 21 "time" 22 23 "github.com/unicornultrafoundation/go-u2u/common" 24 "github.com/unicornultrafoundation/go-u2u/core/state" 25 "github.com/unicornultrafoundation/go-u2u/crypto" 26 "github.com/unicornultrafoundation/go-u2u/log" 27 "github.com/unicornultrafoundation/go-u2u/trie" 28 "golang.org/x/crypto/sha3" 29 ) 30 31 // stateReq represents a batch of state fetch requests grouped together into 32 // a single data retrieval network packet. 33 type stateReq struct { 34 nItems uint16 // Number of items requested for download (max is 384, so uint16 is sufficient) 35 trieTasks map[common.Hash]*trieTask // Trie node download tasks to track previous attempts 36 codeTasks map[common.Hash]*codeTask // Byte code download tasks to track previous attempts 37 timeout time.Duration // Maximum round trip time for this to complete 38 timer *time.Timer // Timer to fire when the RTT timeout expires 39 peer *peerConnection // Peer that we're requesting from 40 delivered time.Time // Time when the packet was delivered (independent when we process it) 41 response [][]byte // Response data of the peer (nil for timeouts) 42 dropped bool // Flag whether the peer dropped off early 43 } 44 45 // timedOut returns if this request timed out. 46 func (req *stateReq) timedOut() bool { 47 return req.response == nil 48 } 49 50 // stateSyncStats is a collection of progress stats to report during a state trie 51 // sync to RPC requests as well as to display in user logs. 52 type stateSyncStats struct { 53 processed uint64 // Number of state entries processed 54 duplicate uint64 // Number of state entries downloaded twice 55 unexpected uint64 // Number of non-requested state entries received 56 pending uint64 // Number of still pending state entries 57 } 58 59 // SyncState starts downloading state with the given root hash. 60 func (d *Leecher) SyncState(root common.Hash) *stateSync { 61 // Create the state sync 62 s := newStateSync(d, root) 63 select { 64 case d.stateSyncStart <- s: 65 // If we tell the statesync to restart with a new root, we also need 66 // to wait for it to actually also start -- when old requests have timed 67 // out or been delivered 68 <-s.started 69 case <-d.quitCh: 70 s.err = errCancelStateFetch 71 close(s.done) 72 } 73 return s 74 } 75 76 // stateFetcher manages the active state sync and accepts requests 77 // on its behalf. 78 func (d *Leecher) stateFetcher() { 79 for { 80 select { 81 case s := <-d.stateSyncStart: 82 for next := s; next != nil; { 83 next = d.runStateSync(next) 84 } 85 case <-d.stateCh: 86 // Ignore state responses while no sync is running. 87 case <-d.quitCh: 88 return 89 } 90 } 91 } 92 93 // runStateSync runs a state synchronisation until it completes or another root 94 // hash is requested to be switched over to. 95 func (d *Leecher) runStateSync(s *stateSync) *stateSync { 96 var ( 97 active = make(map[string]*stateReq) // Currently in-flight requests 98 finished []*stateReq // Completed or failed requests 99 timeout = make(chan *stateReq) // Timed out active requests 100 ) 101 log.Trace("State sync starting", "root", s.root) 102 103 defer func() { 104 // Cancel active request timers on exit. Also set peers to idle so they're 105 // available for the next sync. 106 for _, req := range active { 107 req.timer.Stop() 108 req.peer.SetNodeDataIdle(int(req.nItems), time.Now()) 109 } 110 }() 111 go s.run() 112 defer s.Cancel() 113 114 // Listen for peer departure events to cancel assigned tasks 115 peerDrop := make(chan *peerConnection, 1024) 116 peerSub := s.d.peers.SubscribePeerDrops(peerDrop) 117 defer peerSub.Unsubscribe() 118 119 for { 120 // Enable sending of the first buffered element if there is one. 121 var ( 122 deliverReq *stateReq 123 deliverReqCh chan *stateReq 124 ) 125 if len(finished) > 0 { 126 deliverReq = finished[0] 127 deliverReqCh = s.deliver 128 } 129 130 select { 131 // The stateSync lifecycle: 132 case next := <-d.stateSyncStart: 133 d.spindownStateSync(active, finished, timeout, peerDrop) 134 return next 135 136 case <-s.done: 137 d.spindownStateSync(active, finished, timeout, peerDrop) 138 return nil 139 140 // Send the next finished request to the current sync: 141 case deliverReqCh <- deliverReq: 142 // Shift out the first request, but also set the emptied slot to nil for GC 143 copy(finished, finished[1:]) 144 finished[len(finished)-1] = nil 145 finished = finished[:len(finished)-1] 146 147 // Handle incoming state packs: 148 case pack := <-d.stateCh: 149 // Discard any data not requested (or previously timed out) 150 req := active[pack.PeerId()] 151 if req == nil { 152 log.Debug("Unrequested node data", "peer", pack.PeerId(), "len", pack.Items()) 153 continue 154 } 155 // Finalize the request and queue up for processing 156 req.timer.Stop() 157 req.response = pack.(*statePack).states 158 req.delivered = time.Now() 159 160 finished = append(finished, req) 161 delete(active, pack.PeerId()) 162 163 // Handle dropped peer connections: 164 case p := <-peerDrop: 165 // Skip if no request is currently pending 166 req := active[p.id] 167 if req == nil { 168 continue 169 } 170 // Finalize the request and queue up for processing 171 req.timer.Stop() 172 req.dropped = true 173 req.delivered = time.Now() 174 175 finished = append(finished, req) 176 delete(active, p.id) 177 178 // Handle timed-out requests: 179 case req := <-timeout: 180 // If the peer is already requesting something else, ignore the stale timeout. 181 // This can happen when the timeout and the delivery happens simultaneously, 182 // causing both pathways to trigger. 183 if active[req.peer.id] != req { 184 continue 185 } 186 req.delivered = time.Now() 187 // Move the timed out data back into the download queue 188 finished = append(finished, req) 189 delete(active, req.peer.id) 190 191 // Track outgoing state requests: 192 case req := <-d.trackStateReq: 193 // If an active request already exists for this peer, we have a problem. In 194 // theory the trie node schedule must never assign two requests to the same 195 // peer. In practice however, a peer might receive a request, disconnect and 196 // immediately reconnect before the previous times out. In this case the first 197 // request is never honored, alas we must not silently overwrite it, as that 198 // causes valid requests to go missing and sync to get stuck. 199 if old := active[req.peer.id]; old != nil { 200 log.Warn("Busy peer assigned new state fetch", "peer", old.peer.id) 201 // Move the previous request to the finished set 202 old.timer.Stop() 203 old.dropped = true 204 old.delivered = time.Now() 205 finished = append(finished, old) 206 } 207 // Start a timer to notify the sync loop if the peer stalled. 208 req.timer = time.AfterFunc(req.timeout, func() { 209 timeout <- req 210 }) 211 active[req.peer.id] = req 212 } 213 } 214 } 215 216 // spindownStateSync 'drains' the outstanding requests; some will be delivered and other 217 // will time out. This is to ensure that when the next stateSync starts working, all peers 218 // are marked as idle and de facto _are_ idle. 219 func (d *Leecher) spindownStateSync(active map[string]*stateReq, finished []*stateReq, timeout chan *stateReq, peerDrop chan *peerConnection) { 220 log.Trace("State sync spinning down", "active", len(active), "finished", len(finished)) 221 for len(active) > 0 { 222 var ( 223 req *stateReq 224 reason string 225 ) 226 select { 227 // Handle (drop) incoming state packs: 228 case pack := <-d.stateCh: 229 req = active[pack.PeerId()] 230 reason = "delivered" 231 // Handle dropped peer connections: 232 case p := <-peerDrop: 233 req = active[p.id] 234 reason = "peerdrop" 235 // Handle timed-out requests: 236 case req = <-timeout: 237 reason = "timeout" 238 } 239 if req == nil { 240 continue 241 } 242 req.peer.log.Trace("State peer marked idle (spindown)", "req.items", int(req.nItems), "reason", reason) 243 req.timer.Stop() 244 delete(active, req.peer.id) 245 req.peer.SetNodeDataIdle(int(req.nItems), time.Now()) 246 } 247 // The 'finished' set contains deliveries that we were going to pass to processing. 248 // Those are now moot, but we still need to set those peers as idle, which would 249 // otherwise have been done after processing 250 for _, req := range finished { 251 req.peer.SetNodeDataIdle(int(req.nItems), time.Now()) 252 } 253 } 254 255 // stateSync schedules requests for downloading a particular state trie defined 256 // by a given state root. 257 type stateSync struct { 258 d *Leecher // Downloader instance to access and manage current peerset 259 260 root common.Hash // State root currently being synced 261 sched *trie.Sync // State trie sync scheduler defining the tasks 262 keccak crypto.KeccakState // Keccak256 hasher to verify deliveries with 263 264 trieTasks map[common.Hash]*trieTask // Set of trie node tasks currently queued for retrieval 265 codeTasks map[common.Hash]*codeTask // Set of byte code tasks currently queued for retrieval 266 267 numUncommitted int 268 bytesUncommitted int 269 270 started chan struct{} // Started is signalled once the sync loop starts 271 272 deliver chan *stateReq // Delivery channel multiplexing peer responses 273 cancel chan struct{} // Channel to signal a termination request 274 cancelOnce sync.Once // Ensures cancel only ever gets called once 275 done chan struct{} // Channel to signal termination completion 276 err error // Any error hit during sync (set before completion) 277 } 278 279 // trieTask represents a single trie node download task, containing a set of 280 // peers already attempted retrieval from to detect stalled syncs and abort. 281 type trieTask struct { 282 path [][]byte 283 attempts map[string]struct{} 284 } 285 286 // codeTask represents a single byte code download task, containing a set of 287 // peers already attempted retrieval from to detect stalled syncs and abort. 288 type codeTask struct { 289 attempts map[string]struct{} 290 } 291 292 // newStateSync creates a new state trie download scheduler. This method does not 293 // yet start the sync. The user needs to call run to initiate. 294 func newStateSync(d *Leecher, root common.Hash) *stateSync { 295 return &stateSync{ 296 d: d, 297 root: root, 298 sched: state.NewStateSync(root, d.stateDB, d.stateBloom, nil), 299 keccak: sha3.NewLegacyKeccak256().(crypto.KeccakState), 300 trieTasks: make(map[common.Hash]*trieTask), 301 codeTasks: make(map[common.Hash]*codeTask), 302 deliver: make(chan *stateReq), 303 cancel: make(chan struct{}), 304 done: make(chan struct{}), 305 started: make(chan struct{}), 306 } 307 } 308 309 // run starts the task assignment and response processing loop, blocking until 310 // it finishes, and finally notifying any goroutines waiting for the loop to 311 // finish. 312 func (s *stateSync) run() { 313 close(s.started) 314 s.err = s.d.SnapSyncer.Sync(s.root, s.cancel) 315 close(s.done) 316 } 317 318 // Wait blocks until the sync is done or canceled. 319 func (s *stateSync) Wait() error { 320 <-s.done 321 return s.err 322 } 323 324 // Cancel cancels the sync and waits until it has shut down. 325 func (s *stateSync) Cancel() error { 326 s.cancelOnce.Do(func() { 327 close(s.cancel) 328 }) 329 return s.Wait() 330 }