github.com/linapex/ethereum-go-chinese@v0.0.0-20190316121929-f8b7a73c3fa1/eth/downloader/statesync.go (about) 1 2 //<developer> 3 // <name>linapex 曹一峰</name> 4 // <email>linapex@163.com</email> 5 // <wx>superexc</wx> 6 // <qqgroup>128148617</qqgroup> 7 // <url>https://jsq.ink</url> 8 // <role>pku engineer</role> 9 // <date>2019-03-16 19:16:37</date> 10 //</624450088390889472> 11 12 13 package downloader 14 15 import ( 16 "fmt" 17 "hash" 18 "sync" 19 "time" 20 21 "github.com/ethereum/go-ethereum/common" 22 "github.com/ethereum/go-ethereum/core/rawdb" 23 "github.com/ethereum/go-ethereum/core/state" 24 "github.com/ethereum/go-ethereum/ethdb" 25 "github.com/ethereum/go-ethereum/log" 26 "github.com/ethereum/go-ethereum/trie" 27 "golang.org/x/crypto/sha3" 28 ) 29 30 //statereq表示一批状态获取请求,分组到 31 //单个数据检索网络包。 32 type stateReq struct { 33 items []common.Hash //要下载的状态项的哈希 34 tasks map[common.Hash]*stateTask //下载任务以跟踪以前的尝试 35 timeout time.Duration //Maximum round trip time for this to complete 36 timer *time.Timer //RTT超时过期时要触发的计时器 37 peer *peerConnection //我们请求的同伴 38 response [][]byte //对等机的响应数据(超时为零) 39 dropped bool //标记对等机是否提前退出 40 } 41 42 //如果此请求超时,则返回timed out。 43 func (req *stateReq) timedOut() bool { 44 return req.response == nil 45 } 46 47 //StateSyncStats是状态检索期间要报告的进度统计信息的集合。 48 //同步到RPC请求并显示在用户日志中。 49 type stateSyncStats struct { 50 processed uint64 //处理的状态条目数 51 duplicate uint64 //两次下载的状态条目数 52 unexpected uint64 //接收到的非请求状态条目数 53 pending uint64 //仍挂起状态条目数 54 } 55 56 //SyncState开始使用给定的根哈希下载状态。 57 func (d *Downloader) syncState(root common.Hash) *stateSync { 58 s := newStateSync(d, root) 59 select { 60 case d.stateSyncStart <- s: 61 case <-d.quitCh: 62 s.err = errCancelStateFetch 63 close(s.done) 64 } 65 return s 66 } 67 68 //stateFetcher manages the active state sync and accepts requests 69 //代表它。 70 func (d *Downloader) stateFetcher() { 71 for { 72 select { 73 case s := <-d.stateSyncStart: 74 for next := s; next != nil; { 75 next = d.runStateSync(next) 76 } 77 case <-d.stateCh: 78 //不运行同步时忽略状态响应。 79 case <-d.quitCh: 80 return 81 } 82 } 83 } 84 85 //runStateSync runs a state synchronisation until it completes or another root 86 //请求将哈希切换到。 87 func (d *Downloader) runStateSync(s *stateSync) *stateSync { 88 var ( 89 active = make(map[string]*stateReq) //当前飞行请求 90 finished []*stateReq //已完成或失败的请求 91 timeout = make(chan *stateReq) //活动请求超时 92 ) 93 defer func() { 94 //退出时取消活动请求计时器。还可以将对等机设置为空闲,以便 95 //可用于下一次同步。 96 for _, req := range active { 97 req.timer.Stop() 98 req.peer.SetNodeDataIdle(len(req.items)) 99 } 100 }() 101 //运行状态同步。 102 go s.run() 103 defer s.Cancel() 104 105 //倾听同伴离开事件以取消分配的任务 106 peerDrop := make(chan *peerConnection, 1024) 107 peerSub := s.d.peers.SubscribePeerDrops(peerDrop) 108 defer peerSub.Unsubscribe() 109 110 for { 111 //如果有第一个缓冲元素,则启用发送。 112 var ( 113 deliverReq *stateReq 114 deliverReqCh chan *stateReq 115 ) 116 if len(finished) > 0 { 117 deliverReq = finished[0] 118 deliverReqCh = s.deliver 119 } 120 121 select { 122 //The stateSync lifecycle: 123 case next := <-d.stateSyncStart: 124 return next 125 126 case <-s.done: 127 return nil 128 129 //将下一个完成的请求发送到当前同步: 130 case deliverReqCh <- deliverReq: 131 //移出第一个请求,但也为GC将空槽设置为零 132 copy(finished, finished[1:]) 133 finished[len(finished)-1] = nil 134 finished = finished[:len(finished)-1] 135 136 //处理传入状态包: 137 case pack := <-d.stateCh: 138 //放弃任何未请求的数据(或以前超时的数据) 139 req := active[pack.PeerId()] 140 if req == nil { 141 log.Debug("Unrequested node data", "peer", pack.PeerId(), "len", pack.Items()) 142 continue 143 } 144 //完成请求并排队等待处理 145 req.timer.Stop() 146 req.response = pack.(*statePack).states 147 148 finished = append(finished, req) 149 delete(active, pack.PeerId()) 150 151 //处理掉的对等连接: 152 case p := <-peerDrop: 153 //Skip if no request is currently pending 154 req := active[p.id] 155 if req == nil { 156 continue 157 } 158 //完成请求并排队等待处理 159 req.timer.Stop() 160 req.dropped = true 161 162 finished = append(finished, req) 163 delete(active, p.id) 164 165 //处理超时请求: 166 case req := <-timeout: 167 //如果对等机已经在请求其他东西,请忽略过时的超时。 168 //当超时和传递同时发生时,就会发生这种情况, 169 //导致两种途径触发。 170 if active[req.peer.id] != req { 171 continue 172 } 173 //将超时数据移回下载队列 174 finished = append(finished, req) 175 delete(active, req.peer.id) 176 177 //Track outgoing state requests: 178 case req := <-d.trackStateReq: 179 //如果此对等机已经存在活动请求,则说明存在问题。在 180 //理论上,trie节点调度决不能将两个请求分配给同一个 181 //同龄人。然而,在实践中,对等端可能会收到一个请求,断开连接并 182 //在前一次超时前立即重新连接。在这种情况下,第一个 183 //请求永远不会得到满足,唉,我们不能悄悄地改写它,就像这样。 184 //导致有效的请求丢失,并同步卡住。 185 if old := active[req.peer.id]; old != nil { 186 log.Warn("Busy peer assigned new state fetch", "peer", old.peer.id) 187 188 //确保前一个不会被误丢 189 old.timer.Stop() 190 old.dropped = true 191 192 finished = append(finished, old) 193 } 194 //Start a timer to notify the sync loop if the peer stalled. 195 req.timer = time.AfterFunc(req.timeout, func() { 196 select { 197 case timeout <- req: 198 case <-s.done: 199 //Prevent leaking of timer goroutines in the unlikely case where a 200 //在退出runstatesync之前,计时器将被激发。 201 } 202 }) 203 active[req.peer.id] = req 204 } 205 } 206 } 207 208 //stateSync schedules requests for downloading a particular state trie defined 209 //通过给定的状态根。 210 type stateSync struct { 211 d *Downloader //用于访问和管理当前对等集的下载程序实例 212 213 sched *trie.Sync //State trie sync scheduler defining the tasks 214 keccak hash.Hash //KECCAK256哈希验证交付 215 tasks map[common.Hash]*stateTask //当前排队等待检索的任务集 216 217 numUncommitted int 218 bytesUncommitted int 219 220 deliver chan *stateReq //传递通道多路复用对等响应 221 cancel chan struct{} //发送终止请求信号的通道 222 cancelOnce sync.Once //确保Cancel只被调用一次 223 done chan struct{} //通道到信号终止完成 224 err error //同步期间发生的任何错误(在完成前设置) 225 } 226 227 //statetask表示单个trie节点下载任务,包含一组 228 //peers already attempted retrieval from to detect stalled syncs and abort. 229 type stateTask struct { 230 attempts map[string]struct{} 231 } 232 233 //newstatesync创建新的状态trie下载计划程序。此方法不 234 //开始同步。用户需要调用run来启动。 235 func newStateSync(d *Downloader, root common.Hash) *stateSync { 236 return &stateSync{ 237 d: d, 238 sched: state.NewStateSync(root, d.stateDB), 239 keccak: sha3.NewLegacyKeccak256(), 240 tasks: make(map[common.Hash]*stateTask), 241 deliver: make(chan *stateReq), 242 cancel: make(chan struct{}), 243 done: make(chan struct{}), 244 } 245 } 246 247 //run starts the task assignment and response processing loop, blocking until 248 //它结束,并最终通知等待循环的任何Goroutines 249 //完成。 250 func (s *stateSync) run() { 251 s.err = s.loop() 252 close(s.done) 253 } 254 255 //Wait blocks until the sync is done or canceled. 256 func (s *stateSync) Wait() error { 257 <-s.done 258 return s.err 259 } 260 261 //取消取消同步并等待其关闭。 262 func (s *stateSync) Cancel() error { 263 s.cancelOnce.Do(func() { close(s.cancel) }) 264 return s.Wait() 265 } 266 267 //循环是状态trie-sync的主事件循环。它负责 268 //assignment of new tasks to peers (including sending it to them) as well as 269 //用于处理入站数据。注意,循环不直接 270 //从对等端接收数据,而不是在下载程序中缓冲这些数据, 271 //按这里异步。原因是将处理与数据接收分离 272 //超时。 273 func (s *stateSync) loop() (err error) { 274 //侦听新的对等事件以将任务分配给它们 275 newPeer := make(chan *peerConnection, 1024) 276 peerSub := s.d.peers.SubscribeNewPeers(newPeer) 277 defer peerSub.Unsubscribe() 278 defer func() { 279 cerr := s.commit(true) 280 if err == nil { 281 err = cerr 282 } 283 }() 284 285 //继续分配新任务,直到同步完成或中止 286 for s.sched.Pending() > 0 { 287 if err = s.commit(false); err != nil { 288 return err 289 } 290 s.assignTasks() 291 //分配的任务,等待发生什么 292 select { 293 case <-newPeer: 294 //新对等机已到达,请尝试分配它的下载任务 295 296 case <-s.cancel: 297 return errCancelStateFetch 298 299 case <-s.d.cancelCh: 300 return errCancelStateFetch 301 302 case req := <-s.deliver: 303 //响应、断开连接或超时触发,如果停止,则丢弃对等机 304 log.Trace("Received node data response", "peer", req.peer.id, "count", len(req.response), "dropped", req.dropped, "timeout", !req.dropped && req.timedOut()) 305 if len(req.items) <= 2 && !req.dropped && req.timedOut() { 306 //2项是最低要求,即使超时,我们也没有用 307 //现在这个人。 308 log.Warn("Stalling state sync, dropping peer", "peer", req.peer.id) 309 s.d.dropPeer(req.peer.id) 310 } 311 //处理所有接收到的Blob并检查是否存在过时的传递 312 delivered, err := s.process(req) 313 if err != nil { 314 log.Warn("Node data write error", "err", err) 315 return err 316 } 317 req.peer.SetNodeDataIdle(delivered) 318 } 319 } 320 return nil 321 } 322 323 func (s *stateSync) commit(force bool) error { 324 if !force && s.bytesUncommitted < ethdb.IdealBatchSize { 325 return nil 326 } 327 start := time.Now() 328 b := s.d.stateDB.NewBatch() 329 if written, err := s.sched.Commit(b); written == 0 || err != nil { 330 return err 331 } 332 if err := b.Write(); err != nil { 333 return fmt.Errorf("DB write error: %v", err) 334 } 335 s.updateStats(s.numUncommitted, 0, 0, time.Since(start)) 336 s.numUncommitted = 0 337 s.bytesUncommitted = 0 338 return nil 339 } 340 341 //assign tasks尝试将新任务分配给所有空闲对等端,或者从 342 //当前正在重试批处理,或者从TIE同步本身获取新数据。 343 func (s *stateSync) assignTasks() { 344 //遍历所有空闲对等点,并尝试为其分配状态获取 345 peers, _ := s.d.peers.NodeDataIdlePeers() 346 for _, p := range peers { 347 //分配一批与估计的延迟/带宽成比例的获取 348 cap := p.NodeDataCapacity(s.d.requestRTT()) 349 req := &stateReq{peer: p, timeout: s.d.requestTTL()} 350 s.fillTasks(cap, req) 351 352 //如果为对等机分配了要获取的任务,则发送网络请求 353 if len(req.items) > 0 { 354 req.peer.log.Trace("Requesting new batch of data", "type", "state", "count", len(req.items)) 355 select { 356 case s.d.trackStateReq <- req: 357 req.peer.FetchNodeData(req.items) 358 case <-s.cancel: 359 case <-s.d.cancelCh: 360 } 361 } 362 } 363 } 364 365 //filltasks用最多n个状态下载来填充给定的请求对象 366 //要发送到远程对等机的任务。 367 func (s *stateSync) fillTasks(n int, req *stateReq) { 368 //从调度程序重新填充可用任务。 369 if len(s.tasks) < n { 370 new := s.sched.Missing(n - len(s.tasks)) 371 for _, hash := range new { 372 s.tasks[hash] = &stateTask{make(map[string]struct{})} 373 } 374 } 375 //查找尚未使用请求的对等方尝试的任务。 376 req.items = make([]common.Hash, 0, n) 377 req.tasks = make(map[common.Hash]*stateTask, n) 378 for hash, t := range s.tasks { 379 //当我们收集到足够多的请求时停止 380 if len(req.items) == n { 381 break 382 } 383 //跳过我们已经尝试过的来自此对等方的任何请求 384 if _, ok := t.attempts[req.peer.id]; ok { 385 continue 386 } 387 //将请求分配给该对等方 388 t.attempts[req.peer.id] = struct{}{} 389 req.items = append(req.items, hash) 390 req.tasks[hash] = t 391 delete(s.tasks, hash) 392 } 393 } 394 395 //进程迭代一批已交付状态数据,并注入每个项 396 //进入运行状态同步,重新排队请求但没有的任何项目 397 //交付。返回对等端是否实际成功地传递了 398 //值,以及发生的任何错误。 399 func (s *stateSync) process(req *stateReq) (int, error) { 400 //Collect processing stats and update progress if valid data was received 401 duplicate, unexpected, successful := 0, 0, 0 402 403 defer func(start time.Time) { 404 if duplicate > 0 || unexpected > 0 { 405 s.updateStats(0, duplicate, unexpected, time.Since(start)) 406 } 407 }(time.Now()) 408 409 //对所有传递的数据进行迭代,并逐个注入到trie中 410 for _, blob := range req.response { 411 _, hash, err := s.processNodeData(blob) 412 switch err { 413 case nil: 414 s.numUncommitted++ 415 s.bytesUncommitted += len(blob) 416 successful++ 417 case trie.ErrNotRequested: 418 unexpected++ 419 case trie.ErrAlreadyProcessed: 420 duplicate++ 421 default: 422 return successful, fmt.Errorf("invalid state node %s: %v", hash.TerminalString(), err) 423 } 424 if _, ok := req.tasks[hash]; ok { 425 delete(req.tasks, hash) 426 } 427 } 428 //将未完成的任务放回重试队列 429 npeers := s.d.peers.Len() 430 for hash, task := range req.tasks { 431 //If the node did deliver something, missing items may be due to a protocol 432 //限制或以前的超时+延迟传递。两种情况都应该允许 433 //要重试丢失项的节点(以避免单点暂停)。 434 if len(req.response) > 0 || req.timedOut() { 435 delete(task.attempts, req.peer.id) 436 } 437 //如果我们已经请求节点太多次,可能是恶意的 438 //在没有人拥有正确数据的地方同步。中止。 439 if len(task.attempts) >= npeers { 440 return successful, fmt.Errorf("state node %s failed with all peers (%d tries, %d peers)", hash.TerminalString(), len(task.attempts), npeers) 441 } 442 //缺少项,请放入重试队列。 443 s.tasks[hash] = task 444 } 445 return successful, nil 446 } 447 448 //processNodeData尝试插入从远程服务器传递的trie节点数据blob 449 //查看状态trie,返回是否编写了有用的内容或 450 //发生错误。 451 func (s *stateSync) processNodeData(blob []byte) (bool, common.Hash, error) { 452 res := trie.SyncResult{Data: blob} 453 s.keccak.Reset() 454 s.keccak.Write(blob) 455 s.keccak.Sum(res.Hash[:0]) 456 committed, _, err := s.sched.Process([]trie.SyncResult{res}) 457 return committed, res.Hash, err 458 } 459 460 //updateStats触发各种状态同步进度计数器并显示日志 461 //供用户查看的消息。 462 func (s *stateSync) updateStats(written, duplicate, unexpected int, duration time.Duration) { 463 s.d.syncStatsLock.Lock() 464 defer s.d.syncStatsLock.Unlock() 465 466 s.d.syncStatsState.pending = uint64(s.sched.Pending()) 467 s.d.syncStatsState.processed += uint64(written) 468 s.d.syncStatsState.duplicate += uint64(duplicate) 469 s.d.syncStatsState.unexpected += uint64(unexpected) 470 471 if written > 0 || duplicate > 0 || unexpected > 0 { 472 log.Info("Imported new state entries", "count", written, "elapsed", common.PrettyDuration(duration), "processed", s.d.syncStatsState.processed, "pending", s.d.syncStatsState.pending, "retry", len(s.tasks), "duplicate", s.d.syncStatsState.duplicate, "unexpected", s.d.syncStatsState.unexpected) 473 } 474 if written > 0 { 475 rawdb.WriteFastTrieProgress(s.d.stateDB, s.d.syncStatsState.processed) 476 } 477 } 478