github.com/cranelv/ethereum_mpc@v0.0.0-20191031014521-23aeb1415092/consensus_pbft/pbft/viewchange.go (about) 1 /* 2 Copyright IBM Corp. 2016 All Rights Reserved. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package pbft 18 19 import ( 20 "encoding/base64" 21 "fmt" 22 "reflect" 23 24 "github.com/ethereum/go-ethereum/consensus_pbft/util/events" 25 "github.com/ethereum/go-ethereum/consensus_pbft/message" 26 "github.com/ethereum/go-ethereum/consensus_pbft/pbftTypes" 27 "github.com/ethereum/go-ethereum/consensus_pbft/singletons" 28 ) 29 /* 30 这个机制下有一个叫视图view的概念,在一个视图里,一个是主节点,其余的都叫备份节点。 31 主节点负责将来自客户端的请求给排好序,然后按序发送给备份节点们。 32 但是主节点可能会是拜占庭的:它可能会给不同的请求编上相同的序号,或者不去分配序号,或者让相邻的序号不连续。 33 备份节点应当有职责来主动检查这些序号的合法性,并能通过timeout机制检测到主节点是否已经宕掉。 34 当出现这些异常情况时,这些备份节点就会触发视图更换view change协议来选举出新的主节点。 35 36 视图是连续编号的整数。主节点由公式p = v mod |R|计算得到,这里v是视图编号,p是副本编号,|R|是副本集合的个数。 37 当主节点失效的时候就需要启动视图更换(view change)过程。 38 */ 39 // viewChangeQuorumEvent is returned to the event loop when a new ViewChange message is received which is part of a quorum cert 40 type viewChangeQuorumEvent struct{} 41 42 func (instance *pbftCore) correctViewChange(vc *message.ViewChange) bool { 43 for _, p := range append(vc.Pset, vc.Qset...) { 44 if !(p.View < vc.View && p.SequenceNumber > vc.H && p.SequenceNumber <= vc.H+instance.L) { 45 singletons.Log.Debugf("Replica %d invalid p entry in view-change: vc(v:%d h:%d) p(v:%d n:%d)", 46 instance.id, vc.View, vc.H, p.View, p.SequenceNumber) 47 return false 48 } 49 } 50 51 for _, c := range vc.Cset { 52 // PBFT: the paper says c.n > vc.h 53 if !(c.SequenceNumber >= vc.H && c.SequenceNumber <= vc.H+instance.L) { 54 singletons.Log.Debugf("Replica %d invalid c entry in view-change: vc(v:%d h:%d) c(n:%d)", 55 instance.id, vc.View, vc.H, c.SequenceNumber) 56 return false 57 } 58 } 59 60 return true 61 } 62 63 func (instance *pbftCore) calcPSet() map[uint64]*message.ViewChange_PQ { 64 pset := make(map[uint64]*message.ViewChange_PQ) 65 66 for n, p := range instance.pset { 67 pset[n] = p 68 } 69 70 // P set: requests that have prepared here 71 // 72 // "<n,d,v> has a prepared certificate, and no request 73 // prepared in a later view with the same number" 74 75 for idx, cert := range instance.certStore { 76 if cert.prePrepare == nil { 77 continue 78 } 79 80 digest := cert.digest 81 if !instance.prepared(digest, idx.v, idx.n) { 82 continue 83 } 84 85 if p, ok := pset[idx.n]; ok && p.View > idx.v { 86 continue 87 } 88 89 pset[idx.n] = &message.ViewChange_PQ{ 90 SequenceNumber: idx.n, 91 BatchDigest: digest, 92 View: idx.v, 93 } 94 } 95 96 return pset 97 } 98 99 func (instance *pbftCore) calcQSet() map[qidx]*message.ViewChange_PQ { 100 qset := make(map[qidx]*message.ViewChange_PQ) 101 102 for n, q := range instance.qset { 103 qset[n] = q 104 } 105 106 // Q set: requests that have pre-prepared here (pre-prepare or 107 // prepare sent) 108 // 109 // "<n,d,v>: requests that pre-prepared here, and did not 110 // pre-prepare in a later view with the same number" 111 112 for idx, cert := range instance.certStore { 113 if cert.prePrepare == nil { 114 continue 115 } 116 117 digest := cert.digest 118 if !instance.prePrepared(digest, idx.v, idx.n) { 119 continue 120 } 121 122 qi := qidx{digest, idx.n} 123 if q, ok := qset[qi]; ok && q.View > idx.v { 124 continue 125 } 126 127 qset[qi] = &message.ViewChange_PQ{ 128 SequenceNumber: idx.n, 129 BatchDigest: digest, 130 View: idx.v, 131 } 132 } 133 134 return qset 135 } 136 137 func (instance *pbftCore) sendViewChange() events.Event { 138 instance.stopTimer() 139 140 delete(instance.newViewStore, instance.view) 141 instance.view++ 142 instance.activeView = false 143 144 instance.pset = instance.calcPSet() 145 instance.qset = instance.calcQSet() 146 147 // clear old messages 148 for idx := range instance.certStore { 149 if idx.v < instance.view { 150 delete(instance.certStore, idx) 151 } 152 } 153 for idx := range instance.viewChangeStore { 154 if idx.v < instance.view { 155 delete(instance.viewChangeStore, idx) 156 } 157 } 158 159 vc := &message.ViewChange{ 160 View: instance.view, 161 H: instance.h, 162 ReplicaId: instance.id, 163 } 164 165 for n, id := range instance.chkpts { 166 vc.Cset = append(vc.Cset, &message.ViewChange_C{ 167 SequenceNumber: n, 168 Id: id, 169 }) 170 } 171 172 for _, p := range instance.pset { 173 if p.SequenceNumber < instance.h { 174 singletons.Log.Errorf("BUG! Replica %d should not have anything in our pset less than h, found %+v", instance.id, p) 175 } 176 vc.Pset = append(vc.Pset, p) 177 } 178 179 for _, q := range instance.qset { 180 if q.SequenceNumber < instance.h { 181 singletons.Log.Errorf("BUG! Replica %d should not have anything in our qset less than h, found %+v", instance.id, q) 182 } 183 vc.Qset = append(vc.Qset, q) 184 } 185 186 instance.sign(vc) 187 188 singletons.Log.Infof("Replica %d sending view-change, v:%d, h:%d, |C|:%d, |P|:%d, |Q|:%d", 189 instance.id, vc.View, vc.H, len(vc.Cset), len(vc.Pset), len(vc.Qset)) 190 191 instance.innerBroadcast(vc) 192 193 instance.vcResendTimer.Reset(instance.vcResendTimeout, viewChangeResendTimerEvent{}) 194 195 return instance.recvViewChange(vc) 196 } 197 198 func (instance *pbftCore) recvViewChange(vc *message.ViewChange) events.Event { 199 singletons.Log.Infof("Replica %d received view-change from replica %d, v:%d, h:%d, |C|:%d, |P|:%d, |Q|:%d", 200 instance.id, vc.ReplicaId, vc.View, vc.H, len(vc.Cset), len(vc.Pset), len(vc.Qset)) 201 202 if err := instance.verify(vc); err != nil { 203 singletons.Log.Warnf("Replica %d found incorrect signature in view-change message: %s", instance.id, err) 204 return nil 205 } 206 207 if vc.View < instance.view { 208 singletons.Log.Warnf("Replica %d found view-change message for old view", instance.id) 209 return nil 210 } 211 212 if !instance.correctViewChange(vc) { 213 singletons.Log.Warnf("Replica %d found view-change message incorrect", instance.id) 214 return nil 215 } 216 217 if _, ok := instance.viewChangeStore[vcidx{vc.View, vc.ReplicaId}]; ok { 218 singletons.Log.Warnf("Replica %d already has a view change message for view %d from replica %d", instance.id, vc.View, vc.ReplicaId) 219 return nil 220 } 221 222 instance.viewChangeStore[vcidx{vc.View, vc.ReplicaId}] = vc 223 224 // PBFT TOCS 4.5.1 Liveness: "if a replica receives a set of 225 // f+1 valid VIEW-CHANGE messages from other replicas for 226 // views greater than its current view, it sends a VIEW-CHANGE 227 // message for the smallest view in the set, even if its timer 228 // has not expired" 229 replicas := make(map[pbftTypes.ReplicaID]bool) 230 minView := uint64(0) 231 for idx := range instance.viewChangeStore { 232 if idx.v <= instance.view { 233 continue 234 } 235 236 replicas[idx.id] = true 237 if minView == 0 || idx.v < minView { 238 minView = idx.v 239 } 240 } 241 242 // We only enter this if there are enough view change messages _greater_ than our current view 243 if uint32(len(replicas)) >= instance.f+1 { 244 singletons.Log.Infof("Replica %d received f+1 view-change messages, triggering view-change to view %d", 245 instance.id, minView) 246 // subtract one, because sendViewChange() increments 247 instance.view = minView - 1 248 return instance.sendViewChange() 249 } 250 251 quorum := uint32(0) 252 for idx := range instance.viewChangeStore { 253 if idx.v == instance.view { 254 quorum++ 255 } 256 } 257 singletons.Log.Debugf("Replica %d now has %d view change requests for view %d", instance.id, quorum, instance.view) 258 259 if !instance.activeView && vc.View == instance.view && quorum >= instance.allCorrectReplicasQuorum() { 260 instance.vcResendTimer.Stop() 261 instance.startTimer(instance.lastNewViewTimeout, "new view change") 262 instance.lastNewViewTimeout = 2 * instance.lastNewViewTimeout 263 return viewChangeQuorumEvent{} 264 } 265 266 return nil 267 } 268 269 func (instance *pbftCore) sendNewView() events.Event { 270 271 if _, ok := instance.newViewStore[instance.view]; ok { 272 singletons.Log.Debugf("Replica %d already has new view in store for view %d, skipping", instance.id, instance.view) 273 return nil 274 } 275 276 vset := instance.getViewChanges() 277 278 cp, ok, _ := instance.selectInitialCheckpoint(vset) 279 if !ok { 280 singletons.Log.Infof("Replica %d could not find consistent checkpoint: %+v", instance.id, instance.viewChangeStore) 281 return nil 282 } 283 284 msgList := instance.assignSequenceNumbers(vset, cp.SequenceNumber) 285 if msgList == nil { 286 singletons.Log.Infof("Replica %d could not assign sequence numbers for new view", instance.id) 287 return nil 288 } 289 290 nv := &message.NewView{ 291 View: instance.view, 292 Vset: vset, 293 Xset: msgList, 294 ReplicaId: instance.id, 295 } 296 297 singletons.Log.Infof("Replica %d is new primary, sending new-view, v:%d, X:%+v", 298 instance.id, nv.View, nv.Xset) 299 300 instance.innerBroadcast(nv) 301 instance.newViewStore[instance.view] = nv 302 return instance.processNewView() 303 } 304 305 func (instance *pbftCore) recvNewView(nv *message.NewView) events.Event { 306 singletons.Log.Infof("Replica %d received new-view %d", 307 instance.id, nv.View) 308 309 if !(nv.View > 0 && nv.View >= instance.view && instance.primary(nv.View) == nv.ReplicaId && instance.newViewStore[nv.View] == nil) { 310 singletons.Log.Infof("Replica %d rejecting invalid new-view from %d, v:%d", 311 instance.id, nv.ReplicaId, nv.View) 312 return nil 313 } 314 315 for _, vc := range nv.Vset { 316 if err := instance.verify(vc); err != nil { 317 singletons.Log.Warnf("Replica %d found incorrect view-change signature in new-view message: %s", instance.id, err) 318 return nil 319 } 320 } 321 322 instance.newViewStore[nv.View] = nv 323 return instance.processNewView() 324 } 325 326 func (instance *pbftCore) processNewView() events.Event { 327 var newReqBatchMissing bool 328 nv, ok := instance.newViewStore[instance.view] 329 if !ok { 330 singletons.Log.Errorf("Replica %d ignoring processNewView as it could not find view %d in its newViewStore", instance.id, instance.view) 331 return nil 332 } 333 334 if instance.activeView { 335 singletons.Log.Infof("Replica %d ignoring new-view from %d, v:%d: we are active in view %d", 336 instance.id, nv.ReplicaId, nv.View, instance.view) 337 return nil 338 } 339 340 cp, ok, replicas := instance.selectInitialCheckpoint(nv.Vset) 341 if !ok { 342 singletons.Log.Warnf("Replica %d could not determine initial checkpoint: %+v", 343 instance.id, instance.viewChangeStore) 344 return instance.sendViewChange() 345 } 346 347 speculativeLastExec := instance.lastExec 348 if instance.currentExec != nil { 349 speculativeLastExec = *instance.currentExec 350 } 351 352 // If we have not reached the sequence number, check to see if we can reach it without state transfer 353 // In general, executions are better than state transfer 354 if speculativeLastExec < cp.SequenceNumber { 355 canExecuteToTarget := true 356 outer: 357 for seqNo := speculativeLastExec + 1; seqNo <= cp.SequenceNumber; seqNo++ { 358 found := false 359 for idx, cert := range instance.certStore { 360 if idx.n != seqNo { 361 continue 362 } 363 364 quorum := uint32(0) 365 for _, p := range cert.commit { 366 // Was this committed in the previous view 367 if p.View == idx.v && p.SequenceNumber == seqNo { 368 quorum++ 369 } 370 } 371 372 if quorum < instance.intersectionQuorum() { 373 singletons.Log.Debugf("Replica %d missing quorum of commit certificate for seqNo=%d, only has %d of %d", instance.id, quorum, instance.intersectionQuorum()) 374 continue 375 } 376 377 found = true 378 break 379 } 380 381 if !found { 382 canExecuteToTarget = false 383 singletons.Log.Debugf("Replica %d missing commit certificate for seqNo=%d", instance.id, seqNo) 384 break outer 385 } 386 387 } 388 389 if canExecuteToTarget { 390 singletons.Log.Debugf("Replica %d needs to process a new view, but can execute to the checkpoint seqNo %d, delaying processing of new view", instance.id, cp.SequenceNumber) 391 return nil 392 } 393 394 singletons.Log.Infof("Replica %d cannot execute to the view change checkpoint with seqNo %d", instance.id, cp.SequenceNumber) 395 } 396 397 msgList := instance.assignSequenceNumbers(nv.Vset, cp.SequenceNumber) 398 if msgList == nil { 399 singletons.Log.Warnf("Replica %d could not assign sequence numbers: %+v", 400 instance.id, instance.viewChangeStore) 401 return instance.sendViewChange() 402 } 403 404 if !(len(msgList) == 0 && len(nv.Xset) == 0) && !reflect.DeepEqual(msgList, nv.Xset) { 405 singletons.Log.Warnf("Replica %d failed to verify new-view Xset: computed %+v, received %+v", 406 instance.id, msgList, nv.Xset) 407 return instance.sendViewChange() 408 } 409 410 if instance.h < cp.SequenceNumber { 411 instance.moveWatermarks(cp.SequenceNumber) 412 } 413 414 if speculativeLastExec < cp.SequenceNumber { 415 singletons.Log.Warnf("Replica %d missing base checkpoint %d (%s), our most recent execution %d", instance.id, cp.SequenceNumber, cp.Id, speculativeLastExec) 416 417 snapshotID, err := base64.StdEncoding.DecodeString(cp.Id) 418 if nil != err { 419 err = fmt.Errorf("Replica %d received a view change whose hash could not be decoded (%s)", instance.id, cp.Id) 420 singletons.Log.Error(err.Error()) 421 return nil 422 } 423 424 target := &stateUpdateTarget{ 425 checkpointMessage: checkpointMessage{ 426 seqNo: cp.SequenceNumber, 427 snapshotId: snapshotID, 428 }, 429 replicas: replicas, 430 } 431 432 instance.updateHighStateTarget(target) 433 instance.stateTransfer(target) 434 } 435 436 for n, d := range nv.Xset { 437 // PBFT: why should we use "h ≥ min{n | ∃d : (<n,d> ∈ X)}"? 438 // "h ≥ min{n | ∃d : (<n,d> ∈ X)} ∧ ∀<n,d> ∈ X : (n ≤ h ∨ ∃m ∈ in : (D(m) = d))" 439 if n <= instance.h { 440 continue 441 } else { 442 if d == "" { 443 // NULL request; skip 444 continue 445 } 446 447 if _, ok := instance.reqBatchStore[d]; !ok { 448 singletons.Log.Warnf("Replica %d missing assigned, non-checkpointed request batch %s", 449 instance.id, d) 450 if _, ok := instance.missingReqBatches[d]; !ok { 451 singletons.Log.Warnf("Replica %v requesting to fetch batch %s", 452 instance.id, d) 453 newReqBatchMissing = true 454 instance.missingReqBatches[d] = true 455 } 456 } 457 } 458 } 459 460 if len(instance.missingReqBatches) == 0 { 461 return instance.processNewView2(nv) 462 } else if newReqBatchMissing { 463 instance.fetchRequestBatches() 464 } 465 466 return nil 467 } 468 469 func (instance *pbftCore) processNewView2(nv *message.NewView) events.Event { 470 singletons.Log.Infof("Replica %d accepting new-view to view %d", instance.id, instance.view) 471 472 instance.stopTimer() 473 instance.nullRequestTimer.Stop() 474 475 instance.activeView = true 476 delete(instance.newViewStore, instance.view-1) 477 478 instance.seqNo = instance.h 479 for n, d := range nv.Xset { 480 if n <= instance.h { 481 continue 482 } 483 484 reqBatch, ok := instance.reqBatchStore[d] 485 if !ok && d != "" { 486 singletons.Log.Fatalf("Replica %d is missing request batch for seqNo=%d with digest '%s' for assigned prepare after fetching, this indicates a serious bug", instance.id, n, d) 487 } 488 preprep := &message.PrePrepare{ 489 View: instance.view, 490 SequenceNumber: n, 491 BatchDigest: d, 492 RequestBatch: reqBatch, 493 ReplicaId: instance.id, 494 } 495 cert := instance.getCert(instance.view, n) 496 cert.prePrepare = preprep 497 cert.digest = d 498 if n > instance.seqNo { 499 instance.seqNo = n 500 } 501 instance.persistQSet() 502 } 503 504 instance.updateViewChangeSeqNo() 505 506 if instance.primary(instance.view) != instance.id { 507 for n, d := range nv.Xset { 508 prep := &message.Prepare{ 509 View: instance.view, 510 SequenceNumber: n, 511 BatchDigest: d, 512 ReplicaId: instance.id, 513 } 514 if n > instance.h { 515 cert := instance.getCert(instance.view, n) 516 cert.sentPrepare = true 517 instance.recvPrepare(prep) 518 } 519 instance.innerBroadcast(prep) 520 } 521 } else { 522 singletons.Log.Debugf("Replica %d is now primary, attempting to resubmit requests", instance.id) 523 instance.resubmitRequestBatches() 524 } 525 526 instance.startTimerIfOutstandingRequests() 527 528 singletons.Log.Debugf("Replica %d done cleaning view change artifacts, calling into consumer", instance.id) 529 530 return viewChangedEvent{} 531 } 532 533 func (instance *pbftCore) getViewChanges() (vset []*message.ViewChange) { 534 for _, vc := range instance.viewChangeStore { 535 vset = append(vset, vc) 536 } 537 538 return 539 } 540 541 func (instance *pbftCore) selectInitialCheckpoint(vset []*message.ViewChange) (checkpoint message.ViewChange_C, ok bool, replicas []pbftTypes.ReplicaID) { 542 checkpoints := make(map[message.ViewChange_C][]*message.ViewChange) 543 for _, vc := range vset { 544 for _, c := range vc.Cset { // TODO, verify that we strip duplicate checkpoints from this set 545 checkpoints[*c] = append(checkpoints[*c], vc) 546 singletons.Log.Debugf("Replica %d appending checkpoint from replica %d with seqNo=%d, h=%d, and checkpoint digest %s", instance.id, vc.ReplicaId, vc.H, c.SequenceNumber, c.Id) 547 } 548 } 549 550 if len(checkpoints) == 0 { 551 singletons.Log.Debugf("Replica %d has no checkpoints to select from: %d %s", 552 instance.id, len(instance.viewChangeStore), checkpoints) 553 return 554 } 555 556 for idx, vcList := range checkpoints { 557 // need weak certificate for the checkpoint 558 if uint32(len(vcList)) <= instance.f { // type casting necessary to match types 559 singletons.Log.Debugf("Replica %d has no weak certificate for n:%d, vcList was %d long", 560 instance.id, idx.SequenceNumber, len(vcList)) 561 continue 562 } 563 564 quorum := uint32(0) 565 // Note, this is the whole vset (S) in the paper, not just this checkpoint set (S') (vcList) 566 // We need 2f+1 low watermarks from S below this seqNo from all replicas 567 // We need f+1 matching checkpoints at this seqNo (S') 568 for _, vc := range vset { 569 if vc.H <= idx.SequenceNumber { 570 quorum++ 571 } 572 } 573 574 if quorum < instance.intersectionQuorum() { 575 singletons.Log.Debugf("Replica %d has no quorum for n:%d", instance.id, idx.SequenceNumber) 576 continue 577 } 578 579 replicas = make([]pbftTypes.ReplicaID, len(vcList)) 580 for i, vc := range vcList { 581 replicas[i] = vc.ReplicaId 582 } 583 584 if checkpoint.SequenceNumber <= idx.SequenceNumber { 585 checkpoint = idx 586 ok = true 587 } 588 } 589 590 return 591 } 592 593 func (instance *pbftCore) assignSequenceNumbers(vset []*message.ViewChange, h uint64) (msgList map[uint64]pbftTypes.MessageDigest) { 594 msgList = make(map[uint64]pbftTypes.MessageDigest) 595 596 maxN := h + 1 597 598 // "for all n such that h < n <= h + L" 599 nLoop: 600 for n := h + 1; n <= h+instance.L; n++ { 601 // "∃m ∈ S..." 602 for _, m := range vset { 603 // "...with <n,d,v> ∈ m.P" 604 for _, em := range m.Pset { 605 quorum := uint32(0) 606 // "A1. ∃2f+1 messages m' ∈ S" 607 mpLoop: 608 for _, mp := range vset { 609 if mp.H >= n { 610 continue 611 } 612 // "∀<n,d',v'> ∈ m'.P" 613 for _, emp := range mp.Pset { 614 if n == emp.SequenceNumber && !(emp.View < em.View || (emp.View == em.View && emp.BatchDigest == em.BatchDigest)) { 615 continue mpLoop 616 } 617 } 618 quorum++ 619 } 620 621 if quorum < instance.intersectionQuorum() { 622 continue 623 } 624 625 quorum = 0 626 // "A2. ∃f+1 messages m' ∈ S" 627 for _, mp := range vset { 628 // "∃<n,d',v'> ∈ m'.Q" 629 for _, emp := range mp.Qset { 630 if n == emp.SequenceNumber && emp.View >= em.View && emp.BatchDigest == em.BatchDigest { 631 quorum++ 632 } 633 } 634 } 635 636 if quorum < instance.f+1 { 637 continue 638 } 639 640 // "then select the request with digest d for number n" 641 msgList[n] = em.BatchDigest 642 maxN = n 643 644 continue nLoop 645 } 646 } 647 648 quorum := uint32(0) 649 // "else if ∃2f+1 messages m ∈ S" 650 nullLoop: 651 for _, m := range vset { 652 // "m.P has no entry" 653 for _, em := range m.Pset { 654 if em.SequenceNumber == n { 655 continue nullLoop 656 } 657 } 658 quorum++ 659 } 660 661 if quorum >= instance.intersectionQuorum() { 662 // "then select the null request for number n" 663 msgList[n] = "" 664 665 continue nLoop 666 } 667 668 singletons.Log.Warnf("Replica %d could not assign value to contents of seqNo %d, found only %d missing P entries", instance.id, n, quorum) 669 return nil 670 } 671 672 // prune top null requests 673 for n, msg := range msgList { 674 if n > maxN && msg == "" { 675 delete(msgList, n) 676 } 677 } 678 679 return 680 }