github.com/bigzoro/my_simplechain@v0.0.0-20240315012955-8ad0a2a29bb9/consensus/hotstuff/council.go (about) 1 package hotstuff 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "sort" 8 "sync" 9 "time" 10 11 "github.com/bigzoro/my_simplechain/common" 12 "github.com/bigzoro/my_simplechain/consensus" 13 bls "github.com/bigzoro/my_simplechain/consensus/hotstuff/bls12-381" 14 hots "github.com/bigzoro/my_simplechain/consensus/hotstuff/common" 15 hotsptcl "github.com/bigzoro/my_simplechain/consensus/hotstuff/hotsptcl" 16 "github.com/bigzoro/my_simplechain/core/types" 17 logger "github.com/bigzoro/my_simplechain/log" 18 "github.com/bigzoro/my_simplechain/p2p/enode" 19 ) 20 21 var ( 22 // The maximum acceptable number of consensus phase windows exceeded for the current view. 23 maxProcessView uint64 = 1 << 5 24 25 viewPeriod = time.Second * 5 // The duration of each consensus phase window(view). 26 27 log = logger.New("module", "Hotstuff") 28 ) 29 30 var ( 31 // errViewLagged is returned when handling HotStuff messages with a view that is lagging behind. 32 errViewLagged = errors.New("event view lags behind the local detemined") 33 34 // errClosed is returned when a HotStuff message processing request is received after 35 // the engine has been shut down. 36 errEngineClosed = errors.New("Hotstuff engine closed") 37 ) 38 39 // expiration records remote timeout events to provide feedback on their events 40 // when local timeout occurs. 41 type expiration struct { 42 id hots.ID 43 view uint64 44 } 45 46 type expirations []*expiration 47 48 func (set expirations) Len() int { return len(set) } 49 50 func (set expirations) Swap(i, j int) { set[i], set[j] = set[j], set[i] } 51 52 func (set expirations) Less(i, j int) bool { 53 return set[i].view < set[j].view 54 } 55 56 // poll collects the agreements of certain consensus phase window from the Hotstuff replicas. 57 type poll struct { 58 done bool 59 60 start time.Time 61 view uint64 62 63 signatures bls.PartialSignatureSet 64 } 65 66 // polls tracks all the progress of vote collections for each consensus window 67 // in which the local replica acts as the leader. 68 69 // The latest poll starts, the oledst one expires. 70 type polls struct { 71 threshold int 72 count int 73 head, rear int 74 polls []poll 75 } 76 77 // reset cleans up all the previous collections and reset the threshold for 78 // signature aggregation. 79 func (p *polls) reset(threshold int) { 80 for i := 0; i < p.count; i++ { 81 index := (p.head + i) % len(p.polls) 82 p.polls[index].signatures = nil 83 } 84 p.threshold = threshold 85 p.count, p.head, p.rear = 0, 0, 0 86 } 87 88 // index returns the poll at the given index. 89 func (p *polls) index(index int) *poll { 90 return &p.polls[(p.head+index)%len(p.polls)] 91 } 92 93 // search finds the wanted poll in polls by a given view, and return it's index. 94 func (p *polls) search(view uint64) (int, bool) { 95 i := sort.Search(p.Len(), func(i int) bool { 96 return p.polls[(p.head+i)%len(p.polls)].view == view 97 }) 98 return i, i <= p.count && p.polls[(p.head+i)%len(p.polls)].view == view 99 } 100 101 // aggregatable returns if a poll of certain index is aggregatable. 102 func (p *polls) aggregatable(index int) bool { 103 return len(p.index(index).signatures) >= p.threshold 104 } 105 106 // start appends a new poll to the polls, assuming that all newly added polls 107 // are ordered and increase by view. 108 func (p *polls) start(view uint64, verified ...*bls.PartialSignature) *poll { 109 insert := &p.polls[p.rear] 110 insert.start = time.Now() 111 insert.view = view 112 insert.done = false 113 insert.signatures = make(bls.PartialSignatureSet, 0, p.threshold) 114 insert.signatures = append(insert.signatures, verified...) 115 p.rear = (p.rear + 1) % len(p.polls) 116 if p.count >= len(p.polls) { 117 p.head = (p.head + 1) % len(p.polls) 118 } else { 119 p.count++ 120 } 121 return insert 122 } 123 124 func (p *polls) Len() int { return p.count } 125 126 func (p *polls) Swap(i, j int) { 127 p.polls[(p.head+i)%len(p.polls)], p.polls[(p.head+j)%len(p.polls)] = p.polls[(p.head+j)%len(p.polls)], p.polls[(p.head+i)%len(p.polls)] 128 } 129 130 func (p *polls) Less(i, j int) bool { 131 return p.polls[(p.head+i)%len(p.polls)].view < p.polls[(p.head+j)%len(p.polls)].view 132 } 133 134 // cert is the certificate generated after consensus is reached. 135 type cert struct { 136 view uint64 137 blockBased common.Hash 138 snap *snapshot 139 140 signature *bls.AggregateSignature 141 } 142 143 // optimal maintains the prior certificate and publish it to the subscriber. 144 145 // If multiple subscribers subscribe to certificates based on the same block, the 146 // optimal will only publish the corresponding certificate to one of the subscribers. 147 type optimal struct { 148 mux sync.Mutex 149 view uint64 150 blockBased common.Hash 151 snap *snapshot 152 153 signature *bls.AggregateSignature 154 155 sent uint64 156 subscribes map[common.Hash]chan *cert 157 } 158 159 // reset initializes the state on which the optimal certificate is based and clears all 160 // old subscription. 161 func (op *optimal) reset(base common.Hash, snap *snapshot) { 162 op.mux.Lock() 163 defer op.mux.Unlock() 164 165 op.snap = snap 166 op.blockBased = base 167 op.view = 0 168 op.signature = nil 169 op.sent = 0 170 171 for sbase, sub := range op.subscribes { 172 if sbase != base { 173 close(sub) 174 delete(op.subscribes, sbase) 175 } 176 } 177 } 178 179 // update updates the prior certificate. 180 func (op *optimal) update(poll *poll) { 181 op.mux.Lock() 182 defer op.mux.Unlock() 183 184 if poll.view <= op.view { 185 return 186 } 187 op.view = poll.view 188 op.signature, _ = bls.Combine(poll.signatures...) 189 190 log.Debug("New quorum certificate", "view", op.view, "base", op.blockBased) 191 192 // If there is a certificate subscription on this block, publish this certificate. 193 if sub, ok := op.subscribes[op.blockBased]; ok { 194 select { 195 case sub <- &cert{view: op.view, blockBased: op.blockBased, signature: op.signature.Clone(), snap: op.snap}: 196 op.sent = op.view 197 default: 198 } 199 } 200 } 201 202 // Subscribe subscribes the optimal certificate of a certain block and receives the 203 // certificate at most once. 204 func (op *optimal) Subscribe(sub chan *cert, base common.Hash) { 205 op.mux.Lock() 206 defer op.mux.Unlock() 207 208 if op.view > op.sent && op.signature != nil && base == op.blockBased { 209 select { 210 case sub <- &cert{view: op.view, blockBased: op.blockBased, signature: op.signature.Clone(), snap: op.snap}: 211 op.sent = op.view 212 default: 213 } 214 } 215 op.subscribes[base] = sub 216 } 217 218 // poller oversees the process of legally generating quorum certificates by actively 219 // or passively facilitating consensus among HotStuff replicas. 220 type poller struct { 221 current uint64 222 223 expirations expirations 224 polls *polls 225 optimal *optimal 226 227 delayed map[uint64][]*bls.PartialSignature 228 } 229 230 func newPoller() *poller { 231 return &poller{ 232 optimal: &optimal{subscribes: make(map[common.Hash]chan *cert)}, 233 polls: &polls{polls: make([]poll, maxProcessView)}, 234 delayed: make(map[uint64][]*bls.PartialSignature), 235 } 236 } 237 238 // Reset starts a new consensus phase window and initialized the based state. 239 func (poller *poller) Reset(view uint64, base common.Hash, snap *snapshot) { 240 poller.current = view 241 poller.optimal.reset(base, snap) 242 poller.expirations = poller.expirations[:0] 243 244 // Discard all the expired partial signatures cache as the based state has changed. 245 poller.polls.reset(snap.Threshold()) 246 247 // Clean up the recorded remote timed out events. 248 for view := range poller.delayed { 249 delete(poller.delayed, view) 250 } 251 } 252 253 // Start starts a vote collection task. 254 func (poller *poller) Start(view uint64) { 255 log.Debug("Start collection task", "view", view) 256 257 poll := poller.polls.start(view, poller.delayed[view]...) // with the delayed votes. 258 if !poll.done && len(poll.signatures) >= poller.polls.threshold { 259 poll.done = true 260 poller.optimal.update(poll) 261 262 log.Debug("Finish collection task", "view", poll.view, "elapsed", common.PrettyDuration(time.Since(poll.start))) 263 } 264 delete(poller.delayed, view) 265 } 266 267 // Expire is an attempt to add a timed out event of a remote replica on a specific view 268 // to the timed out events queue, and returns whether it's timed out locally on the view. 269 func (poller *poller) Expire(view uint64, id hots.ID) bool { 270 271 // Process events within the valid range of view (detemined, current+maxProcessExpired). 272 if view > poller.current+maxProcessView { 273 return false 274 } 275 if view <= poller.current { 276 return true 277 } 278 279 i := sort.Search(len(poller.expirations), func(i int) bool { return poller.expirations[i].view == view }) 280 if i >= len(poller.expirations) || poller.expirations[i].view != view { 281 // Insert the unrecorded timed out event. 282 poller.expirations = append(poller.expirations, &expiration{id: id, view: view}) 283 sort.Sort(poller.expirations) 284 } 285 return false 286 } 287 288 // Collect collects signature votes for the agreement of consensus state transition. 289 func (poller *poller) Collect(view uint64, sig *bls.PartialSignature) { 290 i, ok := poller.polls.search(view) 291 if ok { 292 poll := poller.polls.index(i) 293 if dup(poll.signatures, sig) { // Filter out duplicate signatures. 294 return 295 } 296 poll.signatures = append(poll.signatures, sig) 297 298 // The condition for signature aggregation have been met, and the collection 299 // with the highest view can be the optimal source for the future block certificate. 300 if !poll.done && poller.polls.aggregatable(i) { 301 poll.done = true 302 poller.optimal.update(poll) 303 304 log.Debug("Finish collection task", "view", poll.view, "elapsed", common.PrettyDuration(time.Since(poll.start))) 305 } 306 307 } else { 308 if !dup(poller.delayed[view], sig) { // Filter out duplicate signatures. 309 poller.delayed[view] = append(poller.delayed[view], sig) 310 } 311 } 312 } 313 314 // Forward increases the current view and returns the triggered remote replica timed out events. 315 func (poller *poller) Forward() (uint64, expirations) { 316 poller.current++ 317 remove := make(expirations, 0, 1) 318 for i := 0; i < len(poller.expirations) && poller.expirations[i].view <= poller.current; i++ { 319 remove = append(remove, poller.expirations[i]) 320 } 321 if len(remove) > 0 { 322 poller.expirations = append(poller.expirations[:0], poller.expirations[len(remove):]...) 323 } 324 325 return poller.current, remove 326 } 327 328 // vote ancapsulates a vote event from a remote replica. 329 type vote struct { 330 id hots.ID 331 view uint64 332 333 // The vote event includes block information, which helps prevent the failure 334 // of valid vote message verification due to delayed reception of proposal messages. 335 block []byte 336 signature *bls.PartialSignature 337 } 338 339 // timeout ancapsulates a timed out event from a remote replica. 340 type timeout struct { 341 id hots.ID 342 view uint64 343 } 344 345 // transmition is responsible for sending or broadcasting messages to the corresponding 346 // replicas during the HotStuff consensus process. 347 type transmition interface { 348 protocol 349 350 Timeout(view uint64) error 351 Vote(remote hots.ID, view uint64, block common.Hash, sig *bls.PartialSignature) error 352 353 GetEnode(ids []hots.ID) []enode.ID 354 } 355 356 type base struct { 357 mux sync.RWMutex 358 block common.Hash 359 360 snap *snapshot 361 determined uint64 362 } 363 364 // Council is the Hotstuff consensus engine. 365 type Council struct { 366 *Legal 367 368 ctx context.Context 369 cancel context.CancelFunc 370 371 // The identity for the local Hotstuff replica. 372 id hots.ID 373 key *bls.PrivateKey 374 375 // The initial state at the beginning of each round of consensus. 376 base base 377 378 // The events that drive the state transition in HotStuff. 379 newHead chan *types.Header 380 vote chan *vote 381 timeout chan *timeout 382 transmition transmition 383 384 poller *poller 385 delayed map[uint64][]*vote 386 387 navigation *navigation 388 } 389 390 func NewCouncil(lg *Legal, id hots.ID, key *bls.PrivateKey) *Council { 391 ctx, cancel := context.WithCancel(context.Background()) 392 c := &Council{ 393 Legal: lg, 394 395 ctx: ctx, 396 cancel: cancel, 397 id: id, 398 key: key, 399 newHead: make(chan *types.Header), 400 vote: make(chan *vote), 401 timeout: make(chan *timeout), 402 delayed: make(map[uint64][]*vote), 403 poller: newPoller(), 404 navigation: newNavigation(), 405 } 406 hub := hotsptcl.NewHub(c) 407 c.transmition = hub 408 409 go c.schedule(ctx) 410 411 return c 412 } 413 414 // MakeService returns Hotstuff protocol handler service which implements node.Service. 415 // Note, the method should only be called once during the registration of an Ethereum 416 // node service. 417 func (c *Council) MakeService(chain consensus.ChainReader) *Service { 418 return &Service{protocol: c.transmition, chain: chain} 419 } 420 421 // Verify verifies the validity of a partial signature. 422 func (c *Council) VerifyAt(header *types.Header, sig *bls.PartialSignature, text []byte) error { 423 snaphash, _ := extractSnapshot(header, true) 424 425 snap, err := c.snapshot(snaphash) 426 if err != nil { 427 return err 428 } 429 430 return sig.Verify(snap, text) 431 } 432 433 // Sign signs the ciphertext using the local private key 434 func (c *Council) Sign(text []byte) (*bls.PartialSignature, error) { 435 return c.key.SignWithId(text, c.id) 436 } 437 438 // Vote notifies the council of the remote voting events. 439 func (c *Council) Vote(id hots.ID, view uint64, block []byte, sig *bls.PartialSignature) error { 440 select { 441 case c.vote <- &vote{id: id, view: view, block: block, signature: sig}: 442 case <-c.ctx.Done(): 443 return errEngineClosed 444 } 445 return nil 446 } 447 448 // Timeout notifies the council of the remote timed out events. 449 func (c *Council) Timeout(id hots.ID, view uint64) error { 450 select { 451 case c.timeout <- &timeout{id: id, view: view}: 452 case <-c.ctx.Done(): 453 return errEngineClosed 454 } 455 return nil 456 } 457 458 // Prior implements handler.priorBroadcastSelector to choose the prior peers for 459 // broadcasting the block. 460 func (c *Council) Prior(block *types.Block) []enode.ID { 461 snap, err := c.snapshot(common.BytesToHash(block.Header().Extra[:snapshotLen])) 462 if err != nil { 463 return nil 464 } 465 466 view := block.Header().Nonce.Uint64() 467 peers := make([]hots.ID, 0, snap.Threshold()-1) 468 for len(peers) < cap(peers) { 469 peers = append(peers, snap.Leader(view, c.rot)) 470 view++ 471 } 472 473 return c.transmition.GetEnode(peers) 474 } 475 476 func (c *Council) Close() error { 477 c.cancel() 478 c.transmition.Close() 479 return nil 480 } 481 482 // schedule is responsible for synchronizing the view state with remote nodes and managing 483 // its own view state machine. Additionally, A takes charge with vote collection to generate 484 // quorum certificates for proposing blocks. 485 func (c *Council) schedule(ctx context.Context) { 486 timer := time.NewTimer(viewPeriod) 487 loop: 488 for { 489 select { 490 case header := <-c.newHead: 491 if err := c.onPropose(header); err != nil { 492 log.Error("Process proposal", "error", err) 493 } 494 495 timer.Reset(viewPeriod) 496 497 case timeout := <-c.timeout: 498 if c.base.snap == nil { 499 continue loop 500 } 501 if err := c.checkTimeout(timeout); err != nil { 502 log.Debug("Disard remote timeout event", "error", err, "peer", timeout.id, "view", timeout.view) 503 continue loop 504 } 505 506 if c.poller.Expire(timeout.view, timeout.id) { 507 sig, err := c.sign(timeout.view, c.base.block.Bytes()) 508 if err == nil { 509 c.transmition.Vote(timeout.id, timeout.view, c.base.block, sig) 510 } 511 } 512 513 case vote := <-c.vote: 514 if c.base.snap == nil || !bytes.Equal(vote.block, c.base.block.Bytes()) { 515 log.Debug("Add vote for future", "view", vote.view, "id", vote.id, "base", common.BytesToHash(vote.block), "want", c.base.block) 516 c.delayed[vote.view] = append(c.delayed[vote.view], vote) 517 continue loop 518 } 519 520 // Receive votes based on same proposal. 521 522 if err := c.checkVote(vote); err != nil { 523 log.Debug("Invalid vote event", "error", err, "peer", vote.id, "view", vote.view) 524 continue loop 525 } 526 527 log.Debug("Receive vote", "view", vote.view, "id", vote.id, "base", common.BytesToHash(vote.block)) 528 c.poller.Collect(vote.view, vote.signature) 529 530 case <-timer.C: 531 if err := c.onTimeout(); err != nil { 532 log.Error("Process local timeout", "error", err) 533 } 534 timer.Reset(viewPeriod) 535 536 case <-ctx.Done(): 537 return 538 } 539 } 540 } 541 542 // onPropose processes incoming proposals. 543 func (c *Council) onPropose(header *types.Header) error { 544 snap, err := c.snapshot(common.BytesToHash(header.Extra[:snapshotLen])) 545 if err != nil { 546 return err 547 } 548 if ev, _ := extractReplicaEvent(header, true); ev != nil { 549 snap = ev.apply(snap) 550 if err := c.store(snap); err != nil { 551 return err 552 } 553 } 554 555 view := header.Nonce.Uint64() 556 hash := header.Hash() 557 558 c.base.mux.Lock() 559 c.base.determined = view - 1 560 c.base.block = hash 561 c.base.snap = snap 562 c.base.mux.Unlock() 563 564 log.Debug("Based on new consensus state", "number", header.Number.Uint64(), "hash", hash) 565 // todo handle parent block extra event(add HotStuff peer) 566 567 // Reset the task manager to be based on the latest consensus canonical block. 568 c.poller.Reset(view, hash, snap) 569 570 defer func() { 571 for view := range c.delayed { 572 if view <= c.base.determined { 573 delete(c.delayed, view) 574 } 575 } 576 }() 577 578 // todo emit HotStuff block event 579 580 leader := snap.Leader(view, c.rot) 581 if c.id != leader { 582 // Sign the proposal block and send the vote. 583 sig, err := c.sign(view, hash.Bytes()) 584 if err != nil { 585 return err 586 } 587 return c.transmition.Vote(leader, view, hash, sig) 588 } 589 590 // If I am the leader for the next round, fulfill the obligation of collecting 591 // votes for this round. 592 c.poller.Start(view) 593 594 // Validate and add delayed votes that belonging to this new collection task. 595 delayed := c.delayed[view] 596 for _, vote := range delayed { 597 if !bytes.Equal(vote.block, hash.Bytes()) { 598 continue 599 } 600 if err := c.checkVote(vote); err != nil { 601 log.Debug("Invalid vote event", "error", err, "peer", vote.id, "view", vote.view) 602 continue 603 } 604 605 c.poller.Collect(vote.view, vote.signature) 606 } 607 608 // Involves the partial signature signed by leader. 609 sig, err := c.sign(view, hash.Bytes()) 610 if err != nil { 611 return err 612 } 613 c.poller.Collect(view, sig) 614 615 return nil 616 } 617 618 // onTimeout handles or responds to local or remote timeout events 619 func (c *Council) onTimeout() error { 620 if c.base.snap == nil { 621 return nil 622 } 623 624 cur, exprs := c.poller.Forward() 625 626 log.Debug("Local time out", "view", cur-1) 627 if c.base.snap.Leader(cur, c.rot) == c.id { 628 sig, err := c.sign(cur, c.base.block.Bytes()) 629 if err != nil { 630 return err 631 } 632 c.poller.Start(cur) 633 c.poller.Collect(cur, sig) 634 c.transmition.Timeout(cur) 635 636 } else { 637 for i := range exprs { 638 sig, err := c.sign(exprs[i].view, c.base.block.Bytes()) 639 if err != nil { 640 return nil 641 } 642 c.transmition.Vote(exprs[i].id, exprs[i].view, c.base.block, sig) 643 } 644 } 645 646 return nil 647 } 648 649 func (c *Council) checkTimeout(event *timeout) error { 650 if event.view <= c.base.determined { 651 return errViewLagged 652 } 653 654 if event.id != c.base.snap.Leader(event.view, c.rot) { 655 return errors.New("not leader") 656 } 657 return nil 658 } 659 660 func (c *Council) checkVote(event *vote) error { 661 if event.view <= c.base.determined { 662 return errViewLagged 663 } 664 665 return event.signature.Verify(c.base.snap, legacyQuorumCertDigest(event.view, event.block).Bytes()) 666 } 667 668 func (c *Council) sign(view uint64, block []byte) (*bls.PartialSignature, error) { 669 return c.key.SignWithId(legacyQuorumCertDigest(view, block).Bytes(), c.id) 670 } 671 672 // duplicate signatures check when there are few signatures. 673 func dup(origin []*bls.PartialSignature, new *bls.PartialSignature) bool { 674 for _, sig := range origin { 675 if sig.ID() == new.ID() { 676 return true 677 } 678 } 679 return false 680 }