github.com/neatlab/neatio@v1.7.3-0.20220425043230-d903e92fcc75/neatptc/downloader/statesync.go (about) 1 package downloader 2 3 import ( 4 "fmt" 5 "hash" 6 "sync" 7 "time" 8 9 "github.com/neatlab/neatio/chain/core/rawdb" 10 "github.com/neatlab/neatio/chain/core/state" 11 "github.com/neatlab/neatio/chain/log" 12 "github.com/neatlab/neatio/chain/trie" 13 "github.com/neatlab/neatio/neatdb" 14 "github.com/neatlab/neatio/utilities/common" 15 "golang.org/x/crypto/sha3" 16 ) 17 18 type stateReq struct { 19 items []common.Hash 20 tasks map[common.Hash]*stateTask 21 timeout time.Duration 22 timer *time.Timer 23 peer *peerConnection 24 response [][]byte 25 dropped bool 26 } 27 28 func (req *stateReq) timedOut() bool { 29 return req.response == nil 30 } 31 32 type stateSyncStats struct { 33 processed uint64 34 duplicate uint64 35 unexpected uint64 36 pending uint64 37 } 38 39 func (d *Downloader) syncState(root common.Hash) *stateSync { 40 s := newStateSync(d, root) 41 select { 42 case d.stateSyncStart <- s: 43 case <-d.quitCh: 44 s.err = errCancelStateFetch 45 close(s.done) 46 } 47 return s 48 } 49 50 func (d *Downloader) stateFetcher() { 51 for { 52 select { 53 case s := <-d.stateSyncStart: 54 for next := s; next != nil; { 55 next = d.runStateSync(next) 56 } 57 case <-d.stateCh: 58 59 case <-d.quitCh: 60 return 61 } 62 } 63 } 64 65 func (d *Downloader) runStateSync(s *stateSync) *stateSync { 66 var ( 67 active = make(map[string]*stateReq) 68 finished []*stateReq 69 timeout = make(chan *stateReq) 70 ) 71 defer func() { 72 73 for _, req := range active { 74 req.timer.Stop() 75 req.peer.SetNodeDataIdle(len(req.items)) 76 } 77 }() 78 79 go s.run() 80 defer s.Cancel() 81 82 peerDrop := make(chan *peerConnection, 1024) 83 peerSub := s.d.peers.SubscribePeerDrops(peerDrop) 84 defer peerSub.Unsubscribe() 85 86 for { 87 88 var ( 89 deliverReq *stateReq 90 deliverReqCh chan *stateReq 91 ) 92 if len(finished) > 0 { 93 deliverReq = finished[0] 94 deliverReqCh = s.deliver 95 } 96 97 select { 98 99 case next := <-d.stateSyncStart: 100 return next 101 102 case <-s.done: 103 return nil 104 105 case deliverReqCh <- deliverReq: 106 107 copy(finished, finished[1:]) 108 finished[len(finished)-1] = nil 109 finished = finished[:len(finished)-1] 110 111 case pack := <-d.stateCh: 112 113 req := active[pack.PeerId()] 114 if req == nil { 115 log.Debug("Unrequested node data", "peer", pack.PeerId(), "len", pack.Items()) 116 continue 117 } 118 119 req.timer.Stop() 120 req.response = pack.(*statePack).states 121 122 finished = append(finished, req) 123 delete(active, pack.PeerId()) 124 125 case p := <-peerDrop: 126 127 req := active[p.id] 128 if req == nil { 129 continue 130 } 131 132 req.timer.Stop() 133 req.dropped = true 134 135 finished = append(finished, req) 136 delete(active, p.id) 137 138 case req := <-timeout: 139 140 if active[req.peer.id] != req { 141 continue 142 } 143 144 finished = append(finished, req) 145 delete(active, req.peer.id) 146 147 case req := <-d.trackStateReq: 148 149 if old := active[req.peer.id]; old != nil { 150 log.Warn("Busy peer assigned new state fetch", "peer", old.peer.id) 151 152 old.timer.Stop() 153 old.dropped = true 154 155 finished = append(finished, old) 156 } 157 158 req.timer = time.AfterFunc(req.timeout, func() { 159 select { 160 case timeout <- req: 161 case <-s.done: 162 163 } 164 }) 165 active[req.peer.id] = req 166 } 167 } 168 } 169 170 type stateSync struct { 171 d *Downloader 172 173 sched *trie.Sync 174 keccak hash.Hash 175 tasks map[common.Hash]*stateTask 176 177 numUncommitted int 178 bytesUncommitted int 179 180 deliver chan *stateReq 181 cancel chan struct{} 182 cancelOnce sync.Once 183 done chan struct{} 184 err error 185 } 186 187 type stateTask struct { 188 attempts map[string]struct{} 189 } 190 191 func newStateSync(d *Downloader, root common.Hash) *stateSync { 192 return &stateSync{ 193 d: d, 194 sched: state.NewStateSync(root, d.stateDB), 195 keccak: sha3.NewLegacyKeccak256(), 196 tasks: make(map[common.Hash]*stateTask), 197 deliver: make(chan *stateReq), 198 cancel: make(chan struct{}), 199 done: make(chan struct{}), 200 } 201 } 202 203 func (s *stateSync) run() { 204 s.err = s.loop() 205 close(s.done) 206 } 207 208 func (s *stateSync) Wait() error { 209 <-s.done 210 return s.err 211 } 212 213 func (s *stateSync) Cancel() error { 214 s.cancelOnce.Do(func() { close(s.cancel) }) 215 return s.Wait() 216 } 217 218 func (s *stateSync) loop() error { 219 220 newPeer := make(chan *peerConnection, 1024) 221 peerSub := s.d.peers.SubscribeNewPeers(newPeer) 222 defer peerSub.Unsubscribe() 223 224 for s.sched.Pending() > 0 { 225 if err := s.commit(false); err != nil { 226 return err 227 } 228 s.assignTasks() 229 230 select { 231 case <-newPeer: 232 233 case <-s.cancel: 234 return errCancelStateFetch 235 236 case <-s.d.cancelCh: 237 return errCancelStateFetch 238 239 case req := <-s.deliver: 240 241 log.Trace("Received node data response", "peer", req.peer.id, "count", len(req.response), "dropped", req.dropped, "timeout", !req.dropped && req.timedOut()) 242 if len(req.items) <= 2 && !req.dropped && req.timedOut() { 243 244 log.Warn("Stalling state sync, dropping peer", "peer", req.peer.id) 245 s.d.dropPeer(req.peer.id) 246 } 247 248 if err := s.process(req); err != nil { 249 log.Warn("Node data write error", "err", err) 250 return err 251 } 252 req.peer.SetNodeDataIdle(len(req.response)) 253 } 254 } 255 return s.commit(true) 256 } 257 258 func (s *stateSync) commit(force bool) error { 259 if !force && s.bytesUncommitted < neatdb.IdealBatchSize { 260 return nil 261 } 262 start := time.Now() 263 b := s.d.stateDB.NewBatch() 264 s.sched.Commit(b) 265 if err := b.Write(); err != nil { 266 return fmt.Errorf("DB write error: %v", err) 267 } 268 s.updateStats(s.numUncommitted, 0, 0, time.Since(start)) 269 s.numUncommitted = 0 270 s.bytesUncommitted = 0 271 return nil 272 } 273 274 func (s *stateSync) assignTasks() { 275 276 peers, _ := s.d.peers.NodeDataIdlePeers() 277 for _, p := range peers { 278 279 cap := p.NodeDataCapacity(s.d.requestRTT()) 280 req := &stateReq{peer: p, timeout: s.d.requestTTL()} 281 s.fillTasks(cap, req) 282 283 if len(req.items) > 0 { 284 req.peer.log.Trace("Requesting new batch of data", "type", "state", "count", len(req.items)) 285 select { 286 case s.d.trackStateReq <- req: 287 req.peer.FetchNodeData(req.items) 288 case <-s.cancel: 289 case <-s.d.cancelCh: 290 } 291 } 292 } 293 } 294 295 func (s *stateSync) fillTasks(n int, req *stateReq) { 296 297 if len(s.tasks) < n { 298 new := s.sched.Missing(n - len(s.tasks)) 299 for _, hash := range new { 300 s.tasks[hash] = &stateTask{make(map[string]struct{})} 301 } 302 } 303 304 req.items = make([]common.Hash, 0, n) 305 req.tasks = make(map[common.Hash]*stateTask, n) 306 for hash, t := range s.tasks { 307 308 if len(req.items) == n { 309 break 310 } 311 312 if _, ok := t.attempts[req.peer.id]; ok { 313 continue 314 } 315 316 t.attempts[req.peer.id] = struct{}{} 317 req.items = append(req.items, hash) 318 req.tasks[hash] = t 319 delete(s.tasks, hash) 320 } 321 } 322 323 func (s *stateSync) process(req *stateReq) error { 324 325 duplicate, unexpected := 0, 0 326 327 defer func(start time.Time) { 328 if duplicate > 0 || unexpected > 0 { 329 s.updateStats(0, duplicate, unexpected, time.Since(start)) 330 } 331 }(time.Now()) 332 333 progress := false 334 335 for _, blob := range req.response { 336 prog, hash, err := s.processNodeData(blob) 337 switch err { 338 case nil: 339 s.numUncommitted++ 340 s.bytesUncommitted += len(blob) 341 progress = progress || prog 342 case trie.ErrNotRequested: 343 unexpected++ 344 case trie.ErrAlreadyProcessed: 345 duplicate++ 346 default: 347 return fmt.Errorf("invalid state node %s: %v", hash.TerminalString(), err) 348 } 349 if _, ok := req.tasks[hash]; ok { 350 delete(req.tasks, hash) 351 } 352 } 353 354 npeers := s.d.peers.Len() 355 for hash, task := range req.tasks { 356 357 if len(req.response) > 0 || req.timedOut() { 358 delete(task.attempts, req.peer.id) 359 } 360 361 if len(task.attempts) >= npeers { 362 return fmt.Errorf("state node %s failed with all peers (%d tries, %d peers)", hash.TerminalString(), len(task.attempts), npeers) 363 } 364 365 s.tasks[hash] = task 366 } 367 return nil 368 } 369 370 func (s *stateSync) processNodeData(blob []byte) (bool, common.Hash, error) { 371 res := trie.SyncResult{Data: blob} 372 s.keccak.Reset() 373 s.keccak.Write(blob) 374 s.keccak.Sum(res.Hash[:0]) 375 committed, _, err := s.sched.Process([]trie.SyncResult{res}) 376 return committed, res.Hash, err 377 } 378 379 func (s *stateSync) updateStats(written, duplicate, unexpected int, duration time.Duration) { 380 s.d.syncStatsLock.Lock() 381 defer s.d.syncStatsLock.Unlock() 382 383 s.d.syncStatsState.pending = uint64(s.sched.Pending()) 384 s.d.syncStatsState.processed += uint64(written) 385 s.d.syncStatsState.duplicate += uint64(duplicate) 386 s.d.syncStatsState.unexpected += uint64(unexpected) 387 388 if written > 0 || duplicate > 0 || unexpected > 0 { 389 log.Info("Imported new state entries", "count", written, "elapsed", common.PrettyDuration(duration), "processed", s.d.syncStatsState.processed, "pending", s.d.syncStatsState.pending, "retry", len(s.tasks), "duplicate", s.d.syncStatsState.duplicate, "unexpected", s.d.syncStatsState.unexpected) 390 } 391 if written > 0 { 392 rawdb.WriteFastTrieProgress(s.d.stateDB, s.d.syncStatsState.processed) 393 } 394 }