github.com/swiftstack/proxyfs@v0.0.0-20201223034610-5434d919416e/liveness/states.go (about) 1 package liveness 2 3 import ( 4 "crypto/rand" 5 "fmt" 6 "reflect" 7 "runtime" 8 "time" 9 10 "github.com/swiftstack/ProxyFS/inode" 11 "github.com/swiftstack/ProxyFS/logger" 12 ) 13 14 func stateMachine() { 15 for { 16 globals.nextState() 17 } 18 } 19 20 func doCandidate() { 21 var ( 22 awaitingResponses map[*peerStruct]struct{} 23 durationDelta time.Duration 24 err error 25 livenessReportWhileCandidate *internalLivenessReportStruct 26 msgAsFetchLivenessReportRequest *FetchLivenessReportRequestStruct 27 msgAsFetchLivenessReportResponse *FetchLivenessReportResponseStruct 28 msgAsHeartBeatRequest *HeartBeatRequestStruct 29 msgAsHeartBeatResponse *HeartBeatResponseStruct 30 msgAsRequestVoteRequest *RequestVoteRequestStruct 31 msgAsRequestVoteResponse *RequestVoteResponseStruct 32 ok bool 33 recvMsgQueueElement *recvMsgQueueElementStruct 34 peer *peerStruct 35 peers []*peerStruct 36 randByteBuf []byte 37 requestVoteSuccessfulResponses uint64 38 requestVoteSuccessfulResponsesRequiredForQuorum uint64 39 requestVoteExpirationTime time.Time 40 requestVoteExpirationDurationRemaining time.Duration 41 requestVoteMsgTag uint64 42 timeNow time.Time 43 ) 44 45 if LogLevelStateChanges <= globals.logLevel { 46 logger.Infof("%s entered Candidate state", globals.myUDPAddr) 47 } 48 49 // Point all LivenessCheckAssignments at globals.whoAmI 50 51 globals.Lock() 52 livenessReportWhileCandidate = computeLivenessCheckAssignments([]string{globals.whoAmI}) 53 updateMyObservingPeerReportWhileLocked(livenessReportWhileCandidate.observingPeer[globals.whoAmI]) 54 globals.Unlock() 55 globals.livenessCheckerControlChan <- true 56 57 // Attempt to start a new term 58 59 globals.currentTerm++ 60 61 if 0 == len(globals.peersByTuple) { 62 // Special one peer cluster case... there will be no RequestForVote Responses, so just convert to Leader 63 globals.nextState = doLeader 64 return 65 } 66 67 // Issue RequestVoteRequest to all other Peers 68 69 requestVoteMsgTag = fetchNonce() 70 71 msgAsRequestVoteRequest = &RequestVoteRequestStruct{ 72 MsgType: MsgTypeRequestVoteRequest, 73 MsgTag: requestVoteMsgTag, 74 CandidateTerm: globals.currentTerm, 75 } 76 77 peers, err = sendMsg(nil, msgAsRequestVoteRequest) 78 if nil != err { 79 panic(err) 80 } 81 82 awaitingResponses = make(map[*peerStruct]struct{}) 83 84 for _, peer = range peers { 85 awaitingResponses[peer] = struct{}{} 86 } 87 88 // Minimize split votes by picking a requestVoteExpirationTime at some random 89 // point between globals.heartbeatDuration and globals.heartbeatMissDuration 90 91 randByteBuf = make([]byte, 1) 92 _, err = rand.Read(randByteBuf) 93 if nil != err { 94 err = fmt.Errorf("rand.Read(randByteBuf) failed: %v", err) 95 panic(err) 96 } 97 98 durationDelta = globals.heartbeatMissDuration - globals.heartbeatDuration 99 durationDelta *= time.Duration(randByteBuf[0]) 100 durationDelta /= time.Duration(0x100) 101 durationDelta += globals.heartbeatDuration 102 103 requestVoteExpirationTime = time.Now().Add(durationDelta) 104 105 requestVoteSuccessfulResponsesRequiredForQuorum = (uint64(len(awaitingResponses)) + 1) / 2 106 requestVoteSuccessfulResponses = 0 107 108 for { 109 timeNow = time.Now() 110 111 if timeNow.After(requestVoteExpirationTime) || timeNow.Equal(requestVoteExpirationTime) { 112 // Simply return to try again 113 return 114 } 115 116 requestVoteExpirationDurationRemaining = requestVoteExpirationTime.Sub(timeNow) 117 118 select { 119 case <-globals.stateMachineStopChan: 120 globals.stateMachineDone.Done() 121 runtime.Goexit() 122 case <-globals.recvMsgChan: 123 recvMsgQueueElement = popGlobalMsg() 124 if nil != recvMsgQueueElement { 125 peer = recvMsgQueueElement.peer 126 switch recvMsgQueueElement.msgType { 127 case MsgTypeHeartBeatRequest: 128 msgAsHeartBeatRequest = recvMsgQueueElement.msg.(*HeartBeatRequestStruct) 129 if msgAsHeartBeatRequest.LeaderTerm < globals.currentTerm { 130 // Ignore it 131 } else if msgAsHeartBeatRequest.LeaderTerm == globals.currentTerm { 132 // Somebody else must have won the election... so convert to Follower 133 globals.currentLeader = peer 134 msgAsHeartBeatResponse = &HeartBeatResponseStruct{ 135 MsgType: MsgTypeHeartBeatResponse, 136 MsgTag: msgAsHeartBeatRequest.MsgTag, 137 CurrentTerm: globals.currentTerm, 138 Success: true, 139 } 140 _, err = sendMsg(peer, msgAsHeartBeatResponse) 141 if nil != err { 142 panic(err) 143 } 144 globals.nextState = doFollower 145 return 146 } else { // msgAsHeartBeatRequest.LeaderTerm > globals.currentTerm 147 globals.currentTerm = msgAsHeartBeatRequest.LeaderTerm 148 // We missed a subsequent election, so convert to Follower state 149 globals.currentLeader = peer 150 msgAsHeartBeatResponse = &HeartBeatResponseStruct{ 151 MsgType: MsgTypeHeartBeatResponse, 152 MsgTag: msgAsHeartBeatRequest.MsgTag, 153 CurrentTerm: globals.currentTerm, 154 Success: true, 155 } 156 _, err = sendMsg(peer, msgAsHeartBeatResponse) 157 if nil != err { 158 panic(err) 159 } 160 globals.nextState = doFollower 161 return 162 } 163 case MsgTypeHeartBeatResponse: 164 msgAsHeartBeatResponse = recvMsgQueueElement.msg.(*HeartBeatResponseStruct) 165 if msgAsHeartBeatResponse.CurrentTerm < globals.currentTerm { 166 // Ignore it 167 } else if msgAsHeartBeatResponse.CurrentTerm == globals.currentTerm { 168 // Unexpected... so convert to Follower state 169 globals.nextState = doFollower 170 return 171 } else { // msgAsHeartBeatResponse.CurrentTerm > globals.currentTerm 172 globals.currentTerm = msgAsHeartBeatResponse.CurrentTerm 173 // Convert to Follower state 174 globals.nextState = doFollower 175 return 176 } 177 case MsgTypeRequestVoteRequest: 178 msgAsRequestVoteRequest = recvMsgQueueElement.msg.(*RequestVoteRequestStruct) 179 if msgAsRequestVoteRequest.CandidateTerm < globals.currentTerm { 180 // Ignore it 181 } else if msgAsRequestVoteRequest.CandidateTerm == globals.currentTerm { 182 // We voted for ourself, so vote no 183 msgAsRequestVoteResponse = &RequestVoteResponseStruct{ 184 MsgType: MsgTypeRequestVoteResponse, 185 MsgTag: msgAsRequestVoteRequest.MsgTag, 186 CurrentTerm: globals.currentTerm, 187 VoteGranted: false, 188 } 189 _, err = sendMsg(peer, msgAsRequestVoteResponse) 190 if nil != err { 191 panic(err) 192 } 193 } else { // msgAsRequestVoteRequest.CandidateTerm > globals.currentTerm 194 globals.currentTerm = msgAsRequestVoteRequest.CandidateTerm 195 // Abandon our election, vote yes, and convert to Follower 196 globals.currentLeader = nil 197 globals.currentVote = peer 198 msgAsRequestVoteResponse = &RequestVoteResponseStruct{ 199 MsgType: MsgTypeRequestVoteResponse, 200 MsgTag: msgAsRequestVoteRequest.MsgTag, 201 CurrentTerm: globals.currentTerm, 202 VoteGranted: true, 203 } 204 _, err = sendMsg(peer, msgAsRequestVoteResponse) 205 if nil != err { 206 panic(err) 207 } 208 globals.nextState = doFollower 209 return 210 } 211 case MsgTypeRequestVoteResponse: 212 msgAsRequestVoteResponse = recvMsgQueueElement.msg.(*RequestVoteResponseStruct) 213 if msgAsRequestVoteResponse.CurrentTerm < globals.currentTerm { 214 // Ignore it 215 } else if msgAsRequestVoteResponse.CurrentTerm == globals.currentTerm { 216 if requestVoteMsgTag == msgAsRequestVoteResponse.MsgTag { 217 // If this is an unduplicated VoteGranted==true response, check if we are now Leader 218 _, ok = awaitingResponses[peer] 219 if ok { 220 delete(awaitingResponses, peer) 221 if msgAsRequestVoteResponse.VoteGranted { 222 requestVoteSuccessfulResponses++ 223 if requestVoteSuccessfulResponses >= requestVoteSuccessfulResponsesRequiredForQuorum { 224 // Convert to Leader 225 globals.nextState = doLeader 226 return 227 } 228 } 229 } 230 } else { 231 // Unexpected... but ignore it 232 } 233 } else { // msgAsRequestVoteResponse.CurrentTerm > globals.currentTerm 234 globals.currentTerm = msgAsRequestVoteResponse.CurrentTerm 235 // Unexpected... so convert to Follower state 236 globals.nextState = doFollower 237 return 238 } 239 case MsgTypeFetchLivenessReportRequest: 240 msgAsFetchLivenessReportRequest = recvMsgQueueElement.msg.(*FetchLivenessReportRequestStruct) 241 if msgAsFetchLivenessReportRequest.CurrentTerm < globals.currentTerm { 242 // Ignore it 243 } else if msgAsFetchLivenessReportRequest.CurrentTerm == globals.currentTerm { 244 // Unexpected... reject it 245 msgAsFetchLivenessReportResponse = &FetchLivenessReportResponseStruct{ 246 MsgType: MsgTypeFetchLivenessReportResponse, 247 MsgTag: msgAsFetchLivenessReportRequest.MsgTag, 248 CurrentTerm: globals.currentTerm, 249 CurrentLeader: "", 250 Success: false, 251 LivenessReport: nil, 252 } 253 _, err = sendMsg(peer, msgAsRequestVoteResponse) 254 if nil != err { 255 panic(err) 256 } 257 } else { // msgAsFetchLivenessReportRequest.CurrentTerm > globals.currentTerm 258 globals.currentTerm = msgAsRequestVoteResponse.CurrentTerm 259 // Unexpected... reject it and convert to Follower state 260 msgAsFetchLivenessReportResponse = &FetchLivenessReportResponseStruct{ 261 MsgType: MsgTypeFetchLivenessReportResponse, 262 MsgTag: msgAsFetchLivenessReportRequest.MsgTag, 263 CurrentTerm: globals.currentTerm, 264 CurrentLeader: "", 265 Success: false, 266 LivenessReport: nil, 267 } 268 _, err = sendMsg(peer, msgAsRequestVoteResponse) 269 if nil != err { 270 panic(err) 271 } 272 globals.nextState = doFollower 273 return 274 } 275 case MsgTypeFetchLivenessReportResponse: 276 msgAsFetchLivenessReportResponse = recvMsgQueueElement.msg.(*FetchLivenessReportResponseStruct) 277 if msgAsFetchLivenessReportResponse.CurrentTerm < globals.currentTerm { 278 // Ignore it 279 } else if msgAsFetchLivenessReportResponse.CurrentTerm == globals.currentTerm { 280 // Unexpected... so convert to Follower state 281 globals.nextState = doFollower 282 return 283 } else { // msgAsFetchLivenessReportResponse.CurrentTerm > globals.currentTerm 284 globals.currentTerm = msgAsHeartBeatResponse.CurrentTerm 285 // Convert to Follower state 286 globals.nextState = doFollower 287 return 288 } 289 default: 290 err = fmt.Errorf("Unexpected recvMsgQueueElement.msg: %v", reflect.TypeOf(recvMsgQueueElement.msg)) 291 panic(err) 292 } 293 } 294 case <-time.After(requestVoteExpirationDurationRemaining): 295 // We didn't win... but nobody else claims to have either.. so simply return to try again 296 return 297 } 298 } 299 } 300 301 func doFollower() { 302 var ( 303 err error 304 heartbeatMissTime time.Time 305 heartbeatMissDurationRemaining time.Duration 306 msgAsFetchLivenessReportRequest *FetchLivenessReportRequestStruct 307 msgAsFetchLivenessReportResponse *FetchLivenessReportResponseStruct 308 msgAsHeartBeatRequest *HeartBeatRequestStruct 309 msgAsHeartBeatResponse *HeartBeatResponseStruct 310 msgAsRequestVoteRequest *RequestVoteRequestStruct 311 msgAsRequestVoteResponse *RequestVoteResponseStruct 312 observedPeerReport *ObservingPeerStruct 313 peer *peerStruct 314 recvMsgQueueElement *recvMsgQueueElementStruct 315 timeNow time.Time 316 ) 317 318 if LogLevelStateChanges <= globals.logLevel { 319 logger.Infof("%s entered Follower state", globals.myUDPAddr) 320 } 321 322 heartbeatMissTime = time.Now().Add(globals.heartbeatMissDuration) 323 324 for { 325 timeNow = time.Now() 326 327 if timeNow.After(heartbeatMissTime) || timeNow.Equal(heartbeatMissTime) { 328 globals.nextState = doCandidate 329 return 330 } 331 332 heartbeatMissDurationRemaining = heartbeatMissTime.Sub(timeNow) 333 334 select { 335 case <-globals.stateMachineStopChan: 336 globals.stateMachineDone.Done() 337 runtime.Goexit() 338 case <-globals.recvMsgChan: 339 recvMsgQueueElement = popGlobalMsg() 340 if nil != recvMsgQueueElement { 341 peer = recvMsgQueueElement.peer 342 switch recvMsgQueueElement.msgType { 343 case MsgTypeHeartBeatRequest: 344 msgAsHeartBeatRequest = recvMsgQueueElement.msg.(*HeartBeatRequestStruct) 345 if msgAsHeartBeatRequest.LeaderTerm < globals.currentTerm { 346 // Ignore it 347 } else if msgAsHeartBeatRequest.LeaderTerm == globals.currentTerm { 348 // In case this is the first, record .currentLeader 349 globals.currentLeader = peer 350 globals.currentVote = nil 351 // Update RWMode 352 globals.curRWMode = msgAsHeartBeatRequest.NewRWMode 353 err = inode.SetRWMode(globals.curRWMode) 354 if nil != err { 355 logger.FatalfWithError(err, "inode.SetRWMode(%d) failed", globals.curRWMode) 356 } 357 // Compute msgAsHeartBeatResponse.Observed & reset globals.myObservingPeerReport 358 globals.Lock() 359 observedPeerReport = convertInternalToExternalObservingPeerReport(globals.myObservingPeerReport) 360 updateMyObservingPeerReportWhileLocked(convertExternalToInternalObservingPeerReport(msgAsHeartBeatRequest.ToObserve)) 361 globals.Unlock() 362 globals.livenessCheckerControlChan <- true 363 // Send HeartBeat response 364 msgAsHeartBeatResponse = &HeartBeatResponseStruct{ 365 MsgType: MsgTypeHeartBeatResponse, 366 MsgTag: msgAsHeartBeatRequest.MsgTag, 367 CurrentTerm: globals.currentTerm, 368 Success: true, 369 Observed: observedPeerReport, 370 } 371 _, err = sendMsg(peer, msgAsHeartBeatResponse) 372 if nil != err { 373 panic(err) 374 } 375 // Reset heartBeatMissTime 376 heartbeatMissTime = time.Now().Add(globals.heartbeatMissDuration) 377 } else { // msgAsHeartBeatRequest.LeaderTerm > globals.currentTerm 378 globals.currentTerm = msgAsHeartBeatRequest.LeaderTerm 379 // We missed out on Leader election, so record .currentLeader 380 globals.currentLeader = peer 381 globals.currentVote = nil 382 // Update RWMode 383 globals.curRWMode = msgAsHeartBeatRequest.NewRWMode 384 err = inode.SetRWMode(globals.curRWMode) 385 if nil != err { 386 logger.FatalfWithError(err, "inode.SetRWMode(%d) failed", globals.curRWMode) 387 } 388 // Compute msgAsHeartBeatResponse.Observed & reset globals.myObservingPeerReport 389 globals.Lock() 390 observedPeerReport = convertInternalToExternalObservingPeerReport(globals.myObservingPeerReport) 391 updateMyObservingPeerReportWhileLocked(convertExternalToInternalObservingPeerReport(msgAsHeartBeatRequest.ToObserve)) 392 globals.Unlock() 393 globals.livenessCheckerControlChan <- true 394 // Send HeartBeat response 395 msgAsHeartBeatResponse = &HeartBeatResponseStruct{ 396 MsgType: MsgTypeHeartBeatResponse, 397 MsgTag: msgAsHeartBeatRequest.MsgTag, 398 CurrentTerm: globals.currentTerm, 399 Success: true, 400 Observed: observedPeerReport, 401 } 402 _, err = sendMsg(peer, msgAsHeartBeatResponse) 403 if nil != err { 404 panic(err) 405 } 406 // Reset heartBeatMissTime 407 heartbeatMissTime = time.Now().Add(globals.heartbeatMissDuration) 408 } 409 case MsgTypeHeartBeatResponse: 410 msgAsHeartBeatResponse = recvMsgQueueElement.msg.(*HeartBeatResponseStruct) 411 if msgAsHeartBeatResponse.CurrentTerm < globals.currentTerm { 412 // Ignore it 413 } else if msgAsHeartBeatResponse.CurrentTerm == globals.currentTerm { 414 // Unexpected... but ignore it 415 } else { // msgAsHeartBeatResponse.CurrentTerm > globals.currentTerm 416 globals.currentTerm = msgAsHeartBeatResponse.CurrentTerm 417 // Unexpected... but ignore it 418 } 419 case MsgTypeRequestVoteRequest: 420 msgAsRequestVoteRequest = recvMsgQueueElement.msg.(*RequestVoteRequestStruct) 421 if msgAsRequestVoteRequest.CandidateTerm < globals.currentTerm { 422 // Reject it 423 msgAsRequestVoteResponse = &RequestVoteResponseStruct{ 424 MsgType: MsgTypeRequestVoteResponse, 425 MsgTag: msgAsRequestVoteRequest.MsgTag, 426 CurrentTerm: globals.currentTerm, 427 VoteGranted: false, 428 } 429 _, err = sendMsg(peer, msgAsRequestVoteResponse) 430 if nil != err { 431 panic(err) 432 } 433 } else if msgAsRequestVoteRequest.CandidateTerm == globals.currentTerm { 434 if nil != globals.currentLeader { 435 // Candidate missed Leader election, so vote no 436 msgAsRequestVoteResponse = &RequestVoteResponseStruct{ 437 MsgType: MsgTypeRequestVoteResponse, 438 MsgTag: msgAsRequestVoteRequest.MsgTag, 439 CurrentTerm: globals.currentTerm, 440 VoteGranted: false, 441 } 442 _, err = sendMsg(peer, msgAsRequestVoteResponse) 443 if nil != err { 444 panic(err) 445 } 446 } else { 447 if peer == globals.currentVote { 448 // Candidate we voted for missed our yes vote and we received msg twice, so vote yes again 449 msgAsRequestVoteResponse = &RequestVoteResponseStruct{ 450 MsgType: MsgTypeRequestVoteResponse, 451 MsgTag: msgAsRequestVoteRequest.MsgTag, 452 CurrentTerm: globals.currentTerm, 453 VoteGranted: true, 454 } 455 _, err = sendMsg(peer, msgAsRequestVoteResponse) 456 if nil != err { 457 panic(err) 458 } 459 // Reset heartBeatMissTime 460 heartbeatMissTime = time.Now().Add(globals.heartbeatMissDuration) 461 } else { // peer != globals.currentVote 462 // We voted for someone else or didn't vote, so vote no 463 msgAsRequestVoteResponse = &RequestVoteResponseStruct{ 464 MsgType: MsgTypeRequestVoteResponse, 465 MsgTag: msgAsRequestVoteRequest.MsgTag, 466 CurrentTerm: globals.currentTerm, 467 VoteGranted: false, 468 } 469 _, err = sendMsg(peer, msgAsRequestVoteResponse) 470 if nil != err { 471 panic(err) 472 } 473 } 474 } 475 } else { // msgAsRequestVoteRequest.CandidateTerm > globals.currentTerm 476 globals.currentTerm = msgAsRequestVoteRequest.CandidateTerm 477 // Vote yes 478 globals.currentLeader = nil 479 globals.currentVote = peer 480 msgAsRequestVoteResponse = &RequestVoteResponseStruct{ 481 MsgType: MsgTypeRequestVoteResponse, 482 MsgTag: msgAsRequestVoteRequest.MsgTag, 483 CurrentTerm: globals.currentTerm, 484 VoteGranted: true, 485 } 486 _, err = sendMsg(peer, msgAsRequestVoteResponse) 487 if nil != err { 488 panic(err) 489 } 490 // Reset heartBeatMissTime 491 heartbeatMissTime = time.Now().Add(globals.heartbeatMissDuration) 492 } 493 case MsgTypeRequestVoteResponse: 494 msgAsRequestVoteResponse = recvMsgQueueElement.msg.(*RequestVoteResponseStruct) 495 if msgAsRequestVoteResponse.CurrentTerm < globals.currentTerm { 496 // Ignore it 497 } else if msgAsRequestVoteResponse.CurrentTerm == globals.currentTerm { 498 // Ignore it 499 } else { // msgAsRequestVoteResponse.CurrentTerm > globals.currentTerm 500 globals.currentTerm = msgAsRequestVoteResponse.CurrentTerm 501 } 502 case MsgTypeFetchLivenessReportRequest: 503 msgAsFetchLivenessReportRequest = recvMsgQueueElement.msg.(*FetchLivenessReportRequestStruct) 504 if msgAsFetchLivenessReportRequest.CurrentTerm < globals.currentTerm { 505 // Ignore it 506 } else if msgAsFetchLivenessReportRequest.CurrentTerm == globals.currentTerm { 507 // Unexpected... inform requestor of the actual Leader 508 msgAsFetchLivenessReportResponse = &FetchLivenessReportResponseStruct{ 509 MsgType: MsgTypeFetchLivenessReportResponse, 510 MsgTag: msgAsFetchLivenessReportRequest.MsgTag, 511 CurrentTerm: globals.currentTerm, 512 CurrentLeader: globals.currentLeader.udpAddr.String(), 513 Success: false, 514 LivenessReport: nil, 515 } 516 _, err = sendMsg(peer, msgAsRequestVoteResponse) 517 if nil != err { 518 panic(err) 519 } 520 } else { // msgAsFetchLivenessReportRequest.CurrentTerm > globals.currentTerm 521 globals.currentTerm = msgAsFetchLivenessReportRequest.CurrentTerm 522 msgAsFetchLivenessReportResponse = &FetchLivenessReportResponseStruct{ 523 MsgType: MsgTypeFetchLivenessReportResponse, 524 MsgTag: msgAsFetchLivenessReportRequest.MsgTag, 525 CurrentTerm: globals.currentTerm, 526 CurrentLeader: globals.currentLeader.udpAddr.String(), 527 Success: false, 528 LivenessReport: nil, 529 } 530 _, err = sendMsg(peer, msgAsRequestVoteResponse) 531 if nil != err { 532 panic(err) 533 } 534 } 535 case MsgTypeFetchLivenessReportResponse: 536 msgAsFetchLivenessReportResponse = recvMsgQueueElement.msg.(*FetchLivenessReportResponseStruct) 537 deliverResponse(recvMsgQueueElement.msgTag, msgAsFetchLivenessReportResponse) 538 default: 539 err = fmt.Errorf("Unexpected recvMsgQueueElement.msg: %v", reflect.TypeOf(recvMsgQueueElement.msg)) 540 panic(err) 541 } 542 } 543 case <-time.After(heartbeatMissDurationRemaining): 544 globals.nextState = doCandidate 545 return 546 } 547 } 548 } 549 550 func doLeader() { 551 var ( 552 awaitingResponses map[*peerStruct]struct{} 553 err error 554 heartbeatDurationRemaining time.Duration 555 heartbeatMsgTag uint64 556 heartbeatSendTime time.Time 557 heartbeatSuccessfulResponses uint64 558 heartbeatSuccessfulResponsesRequiredForQuorum uint64 559 livenessReportThisHeartBeat *internalLivenessReportStruct 560 maxDiskUsagePercentage uint8 561 msgAsFetchLivenessReportRequest *FetchLivenessReportRequestStruct 562 msgAsFetchLivenessReportResponse *FetchLivenessReportResponseStruct 563 msgAsHeartBeatRequest *HeartBeatRequestStruct 564 msgAsHeartBeatResponse *HeartBeatResponseStruct 565 msgAsRequestVoteRequest *RequestVoteRequestStruct 566 msgAsRequestVoteResponse *RequestVoteResponseStruct 567 ok bool 568 peer *peerStruct 569 observingPeerReport *internalObservingPeerReportStruct 570 quorumMembersLastHeartBeat []string 571 quorumMembersThisHeartBeat []string 572 reconEndpointReport *internalReconEndpointReportStruct 573 recvMsgQueueElement *recvMsgQueueElementStruct 574 timeNow time.Time 575 ) 576 577 if LogLevelStateChanges <= globals.logLevel { 578 logger.Infof("%s entered Leader state", globals.myUDPAddr) 579 } 580 581 heartbeatSendTime = time.Now() // Force first time through for{} loop to send a heartbeat 582 583 quorumMembersThisHeartBeat = []string{globals.whoAmI} 584 585 globals.Lock() 586 globals.myObservingPeerReport = &internalObservingPeerReportStruct{ 587 name: globals.whoAmI, 588 servingPeer: make(map[string]*internalServingPeerReportStruct), 589 reconEndpoint: make(map[string]*internalReconEndpointReportStruct), 590 } 591 globals.Unlock() 592 globals.livenessCheckerControlChan <- true 593 594 livenessReportThisHeartBeat = &internalLivenessReportStruct{ 595 observingPeer: make(map[string]*internalObservingPeerReportStruct), 596 } 597 598 for { 599 timeNow = time.Now() 600 601 if timeNow.Before(heartbeatSendTime) { 602 heartbeatDurationRemaining = heartbeatSendTime.Sub(timeNow) 603 } else { 604 globals.Lock() 605 606 mergeObservingPeerReportIntoLivenessReport(globals.myObservingPeerReport, livenessReportThisHeartBeat) 607 608 globals.livenessReport = livenessReportThisHeartBeat 609 610 quorumMembersLastHeartBeat = make([]string, len(quorumMembersThisHeartBeat)) 611 _ = copy(quorumMembersLastHeartBeat, quorumMembersThisHeartBeat) 612 613 livenessReportThisHeartBeat = computeLivenessCheckAssignments(quorumMembersLastHeartBeat) 614 615 updateMyObservingPeerReportWhileLocked(livenessReportThisHeartBeat.observingPeer[globals.whoAmI]) 616 617 globals.Unlock() 618 619 quorumMembersThisHeartBeat = make([]string, 1, 1+len(globals.peersByName)) 620 quorumMembersThisHeartBeat[0] = globals.whoAmI 621 622 heartbeatMsgTag = fetchNonce() 623 624 awaitingResponses = make(map[*peerStruct]struct{}) 625 626 for _, peer = range globals.peersByName { 627 msgAsHeartBeatRequest = &HeartBeatRequestStruct{ 628 MsgType: MsgTypeHeartBeatRequest, 629 MsgTag: heartbeatMsgTag, 630 LeaderTerm: globals.currentTerm, 631 NewRWMode: globals.curRWMode, 632 ToObserve: convertInternalToExternalObservingPeerReport(livenessReportThisHeartBeat.observingPeer[peer.name]), 633 } 634 635 _, err = sendMsg(peer, msgAsHeartBeatRequest) 636 if nil != err { 637 panic(err) 638 } 639 640 awaitingResponses[peer] = struct{}{} 641 } 642 643 heartbeatSendTime = timeNow.Add(globals.heartbeatDuration) 644 heartbeatDurationRemaining = globals.heartbeatDuration 645 646 heartbeatSuccessfulResponsesRequiredForQuorum = (uint64(len(awaitingResponses)) + 1) / 2 647 heartbeatSuccessfulResponses = 0 648 } 649 650 select { 651 case <-globals.stateMachineStopChan: 652 globals.stateMachineDone.Done() 653 runtime.Goexit() 654 case <-globals.recvMsgChan: 655 recvMsgQueueElement = popGlobalMsg() 656 if nil != recvMsgQueueElement { 657 peer = recvMsgQueueElement.peer 658 switch recvMsgQueueElement.msgType { 659 case MsgTypeHeartBeatRequest: 660 msgAsHeartBeatRequest = recvMsgQueueElement.msg.(*HeartBeatRequestStruct) 661 if msgAsHeartBeatRequest.LeaderTerm < globals.currentTerm { 662 // Ignore it 663 } else if msgAsHeartBeatRequest.LeaderTerm == globals.currentTerm { 664 // Unexpected... so convert to Candidate state 665 msgAsHeartBeatResponse = &HeartBeatResponseStruct{ 666 MsgType: MsgTypeHeartBeatResponse, 667 MsgTag: msgAsHeartBeatRequest.MsgTag, 668 CurrentTerm: globals.currentTerm, 669 Success: false, 670 } 671 _, err = sendMsg(peer, msgAsHeartBeatResponse) 672 if nil != err { 673 panic(err) 674 } 675 globals.nextState = doCandidate 676 return 677 } else { // msgAsHeartBeatRequest.LeaderTerm > globals.currentTerm 678 globals.currentTerm = msgAsHeartBeatRequest.LeaderTerm 679 // We missed a subsequent election, so convert to Follower state 680 globals.currentLeader = peer 681 msgAsHeartBeatResponse = &HeartBeatResponseStruct{ 682 MsgType: MsgTypeHeartBeatResponse, 683 MsgTag: msgAsHeartBeatRequest.MsgTag, 684 CurrentTerm: globals.currentTerm, 685 Success: true, 686 } 687 _, err = sendMsg(peer, msgAsHeartBeatResponse) 688 if nil != err { 689 panic(err) 690 } 691 globals.nextState = doFollower 692 return 693 } 694 case MsgTypeHeartBeatResponse: 695 msgAsHeartBeatResponse = recvMsgQueueElement.msg.(*HeartBeatResponseStruct) 696 if msgAsHeartBeatResponse.CurrentTerm < globals.currentTerm { 697 // Ignore it 698 } else if msgAsHeartBeatResponse.CurrentTerm == globals.currentTerm { 699 if heartbeatMsgTag == msgAsHeartBeatResponse.MsgTag { 700 _, ok = awaitingResponses[peer] 701 if ok { 702 delete(awaitingResponses, peer) 703 if msgAsHeartBeatResponse.Success { 704 heartbeatSuccessfulResponses++ 705 quorumMembersThisHeartBeat = append(quorumMembersThisHeartBeat, peer.name) 706 if nil != msgAsHeartBeatResponse.Observed { 707 observingPeerReport = convertExternalToInternalObservingPeerReport(msgAsHeartBeatResponse.Observed) 708 if nil != observingPeerReport { 709 mergeObservingPeerReportIntoLivenessReport(observingPeerReport, livenessReportThisHeartBeat) 710 } 711 } 712 } else { 713 // Unexpected... so convert to Follower state 714 globals.nextState = doFollower 715 return 716 } 717 } 718 } else { 719 // Ignore it 720 } 721 } else { // msgAsHeartBeatResponse.CurrentTerm > globals.currentTerm 722 globals.currentTerm = msgAsHeartBeatResponse.CurrentTerm 723 // Convert to Follower state 724 globals.nextState = doFollower 725 return 726 } 727 case MsgTypeRequestVoteRequest: 728 msgAsRequestVoteRequest = recvMsgQueueElement.msg.(*RequestVoteRequestStruct) 729 if msgAsRequestVoteRequest.CandidateTerm < globals.currentTerm { 730 // Ignore it 731 } else if msgAsRequestVoteRequest.CandidateTerm == globals.currentTerm { 732 // Ignore it 733 } else { // msgAsRequestVoteRequest.CandidateTerm > globals.currentTerm 734 globals.currentTerm = msgAsRequestVoteRequest.CandidateTerm 735 // Abandon our Leadership, vote yes, and convert to Follower 736 globals.currentLeader = nil 737 globals.currentVote = peer 738 msgAsRequestVoteResponse = &RequestVoteResponseStruct{ 739 MsgType: MsgTypeRequestVoteResponse, 740 MsgTag: msgAsRequestVoteRequest.MsgTag, 741 CurrentTerm: globals.currentTerm, 742 VoteGranted: true, 743 } 744 _, err = sendMsg(peer, msgAsRequestVoteResponse) 745 if nil != err { 746 panic(err) 747 } 748 globals.nextState = doFollower 749 return 750 } 751 case MsgTypeRequestVoteResponse: 752 msgAsRequestVoteResponse = recvMsgQueueElement.msg.(*RequestVoteResponseStruct) 753 if msgAsRequestVoteResponse.CurrentTerm < globals.currentTerm { 754 // Ignore it 755 } else if msgAsRequestVoteResponse.CurrentTerm == globals.currentTerm { 756 // Ignore it 757 } else { // msgAsRequestVoteResponse.CurrentTerm > globals.currentTerm 758 globals.currentTerm = msgAsRequestVoteResponse.CurrentTerm 759 // Unexpected... so convert to Follower state 760 globals.nextState = doFollower 761 return 762 } 763 case MsgTypeFetchLivenessReportRequest: 764 msgAsFetchLivenessReportRequest = recvMsgQueueElement.msg.(*FetchLivenessReportRequestStruct) 765 if msgAsFetchLivenessReportRequest.CurrentTerm < globals.currentTerm { 766 // Ignore it 767 } else if msgAsFetchLivenessReportRequest.CurrentTerm == globals.currentTerm { 768 msgAsFetchLivenessReportResponse = &FetchLivenessReportResponseStruct{ 769 MsgType: MsgTypeFetchLivenessReportResponse, 770 MsgTag: msgAsFetchLivenessReportRequest.MsgTag, 771 CurrentTerm: globals.currentTerm, 772 CurrentLeader: globals.myUDPAddr.String(), 773 } 774 msgAsFetchLivenessReportResponse.LivenessReport = convertInternalToExternalLivenessReport(globals.livenessReport) 775 msgAsFetchLivenessReportResponse.Success = (nil != msgAsFetchLivenessReportResponse.LivenessReport) 776 _, err = sendMsg(peer, msgAsFetchLivenessReportResponse) 777 if nil != err { 778 panic(err) 779 } 780 } else { // msgAsFetchLivenessReportRequest.CurrentTerm > globals.currentTerm 781 globals.currentTerm = msgAsFetchLivenessReportRequest.CurrentTerm 782 // Unexpected... reject it and convert to Follower state 783 msgAsFetchLivenessReportResponse = &FetchLivenessReportResponseStruct{ 784 MsgType: MsgTypeFetchLivenessReportResponse, 785 MsgTag: msgAsFetchLivenessReportRequest.MsgTag, 786 CurrentTerm: globals.currentTerm, 787 CurrentLeader: "", 788 Success: false, 789 LivenessReport: nil, 790 } 791 _, err = sendMsg(peer, msgAsRequestVoteResponse) 792 if nil != err { 793 panic(err) 794 } 795 globals.nextState = doFollower 796 return 797 } 798 case MsgTypeFetchLivenessReportResponse: 799 msgAsFetchLivenessReportResponse = recvMsgQueueElement.msg.(*FetchLivenessReportResponseStruct) 800 if msgAsFetchLivenessReportResponse.CurrentTerm < globals.currentTerm { 801 // Ignore it 802 } else if msgAsFetchLivenessReportResponse.CurrentTerm == globals.currentTerm { 803 // Unexpected... so convert to Follower state 804 globals.nextState = doFollower 805 return 806 } else { // msgAsFetchLivenessReportResponse.CurrentTerm > globals.currentTerm 807 globals.currentTerm = msgAsHeartBeatResponse.CurrentTerm 808 // Convert to Follower state 809 globals.nextState = doFollower 810 return 811 } 812 default: 813 err = fmt.Errorf("Unexpected recvMsgQueueElement.msg: %v", reflect.TypeOf(recvMsgQueueElement.msg)) 814 panic(err) 815 } 816 } 817 case <-time.After(heartbeatDurationRemaining): 818 if heartbeatSuccessfulResponses >= heartbeatSuccessfulResponsesRequiredForQuorum { 819 // Compute new RWMode 820 821 maxDiskUsagePercentage = 0 822 823 for _, observingPeerReport = range globals.livenessReport.observingPeer { 824 for _, reconEndpointReport = range observingPeerReport.reconEndpoint { 825 if reconEndpointReport.maxDiskUsagePercentage > maxDiskUsagePercentage { 826 maxDiskUsagePercentage = reconEndpointReport.maxDiskUsagePercentage 827 } 828 } 829 } 830 831 if maxDiskUsagePercentage >= globals.swiftReconReadOnlyThreshold { 832 globals.curRWMode = inode.RWModeReadOnly 833 } else if maxDiskUsagePercentage >= globals.swiftReconNoWriteThreshold { 834 globals.curRWMode = inode.RWModeNoWrite 835 } else { 836 globals.curRWMode = inode.RWModeNormal 837 } 838 839 err = inode.SetRWMode(globals.curRWMode) 840 if nil != err { 841 logger.FatalfWithError(err, "inode.SetRWMode(%d) failed", globals.curRWMode) 842 } 843 844 // Now just loop back and issue a fresh HeartBeat 845 } else { 846 // Quorum lost... convert to Candidate state 847 globals.nextState = doCandidate 848 return 849 } 850 } 851 } 852 }