github.com/defanghe/fabric@v2.1.1+incompatible/gossip/election/election.go (about) 1 /* 2 Copyright IBM Corp. All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package election 8 9 import ( 10 "bytes" 11 "encoding/hex" 12 "sync" 13 "sync/atomic" 14 "time" 15 16 "github.com/hyperledger/fabric/gossip/util" 17 ) 18 19 // Gossip leader election module 20 // Algorithm properties: 21 // - Peers break symmetry by comparing IDs 22 // - Each peer is either a leader or a follower, 23 // and the aim is to have exactly 1 leader if the membership view 24 // is the same for all peers 25 // - If the network is partitioned into 2 or more sets, the number of leaders 26 // is the number of network partitions, but when the partition heals, 27 // only 1 leader should be left eventually 28 // - Peers communicate by gossiping leadership proposal or declaration messages 29 30 // The Algorithm, in pseudo code: 31 // 32 // 33 // variables: 34 // leaderKnown = false 35 // 36 // Invariant: 37 // Peer listens for messages from remote peers 38 // and whenever it receives a leadership declaration, 39 // leaderKnown is set to true 40 // 41 // Startup(): 42 // wait for membership view to stabilize, or for a leadership declaration is received 43 // or the startup timeout expires. 44 // goto SteadyState() 45 // 46 // SteadyState(): 47 // while true: 48 // If leaderKnown is false: 49 // LeaderElection() 50 // If you are the leader: 51 // Broadcast leadership declaration 52 // If a leadership declaration was received from 53 // a peer with a lower ID, 54 // become a follower 55 // Else, you're a follower: 56 // If haven't received a leadership declaration within 57 // a time threshold: 58 // set leaderKnown to false 59 // 60 // LeaderElection(): 61 // Gossip leadership proposal message 62 // Collect messages from other peers sent within a time period 63 // If received a leadership declaration: 64 // return 65 // Iterate over all proposal messages collected. 66 // If a proposal message from a peer with an ID lower 67 // than yourself was received, return. 68 // Else, declare yourself a leader 69 70 // LeaderElectionAdapter is used by the leader election module 71 // to send and receive messages and to get membership information 72 type LeaderElectionAdapter interface { 73 // Gossip gossips a message to other peers 74 Gossip(Msg) 75 76 // Accept returns a channel that emits messages 77 Accept() <-chan Msg 78 79 // CreateProposalMessage 80 CreateMessage(isDeclaration bool) Msg 81 82 // Peers returns a list of peers considered alive 83 Peers() []Peer 84 85 // ReportMetrics sends a report to the metrics server about a leadership status 86 ReportMetrics(isLeader bool) 87 } 88 89 type leadershipCallback func(isLeader bool) 90 91 // LeaderElectionService is the object that runs the leader election algorithm 92 type LeaderElectionService interface { 93 // IsLeader returns whether this peer is a leader or not 94 IsLeader() bool 95 96 // Stop stops the LeaderElectionService 97 Stop() 98 99 // Yield relinquishes the leadership until a new leader is elected, 100 // or a timeout expires 101 Yield() 102 } 103 104 type peerID []byte 105 106 func (p peerID) String() string { 107 if p == nil { 108 return "<nil>" 109 } 110 return hex.EncodeToString(p) 111 } 112 113 // Peer describes a remote peer 114 type Peer interface { 115 // ID returns the ID of the peer 116 ID() peerID 117 } 118 119 // Msg describes a message sent from a remote peer 120 type Msg interface { 121 // SenderID returns the ID of the peer sent the message 122 SenderID() peerID 123 // IsProposal returns whether this message is a leadership proposal 124 IsProposal() bool 125 // IsDeclaration returns whether this message is a leadership declaration 126 IsDeclaration() bool 127 } 128 129 func noopCallback(_ bool) { 130 } 131 132 const ( 133 DefStartupGracePeriod = time.Second * 15 134 DefMembershipSampleInterval = time.Second 135 DefLeaderAliveThreshold = time.Second * 10 136 DefLeaderElectionDuration = time.Second * 5 137 ) 138 139 type ElectionConfig struct { 140 StartupGracePeriod time.Duration 141 MembershipSampleInterval time.Duration 142 LeaderAliveThreshold time.Duration 143 LeaderElectionDuration time.Duration 144 } 145 146 // NewLeaderElectionService returns a new LeaderElectionService 147 func NewLeaderElectionService(adapter LeaderElectionAdapter, id string, callback leadershipCallback, config ElectionConfig) LeaderElectionService { 148 if len(id) == 0 { 149 panic("Empty id") 150 } 151 le := &leaderElectionSvcImpl{ 152 id: peerID(id), 153 proposals: util.NewSet(), 154 adapter: adapter, 155 stopChan: make(chan struct{}), 156 interruptChan: make(chan struct{}, 1), 157 logger: util.GetLogger(util.ElectionLogger, ""), 158 callback: noopCallback, 159 config: config, 160 } 161 162 if callback != nil { 163 le.callback = callback 164 } 165 166 go le.start() 167 return le 168 } 169 170 // leaderElectionSvcImpl is an implementation of a LeaderElectionService 171 type leaderElectionSvcImpl struct { 172 id peerID 173 proposals *util.Set 174 sync.Mutex 175 stopChan chan struct{} 176 interruptChan chan struct{} 177 stopWG sync.WaitGroup 178 isLeader int32 179 leaderExists int32 180 yield int32 181 sleeping bool 182 adapter LeaderElectionAdapter 183 logger util.Logger 184 callback leadershipCallback 185 yieldTimer *time.Timer 186 config ElectionConfig 187 } 188 189 func (le *leaderElectionSvcImpl) start() { 190 le.stopWG.Add(2) 191 go le.handleMessages() 192 le.waitForMembershipStabilization(le.config.StartupGracePeriod) 193 go le.run() 194 } 195 196 func (le *leaderElectionSvcImpl) handleMessages() { 197 le.logger.Debug(le.id, ": Entering") 198 defer le.logger.Debug(le.id, ": Exiting") 199 defer le.stopWG.Done() 200 msgChan := le.adapter.Accept() 201 for { 202 select { 203 case <-le.stopChan: 204 return 205 case msg := <-msgChan: 206 if !le.isAlive(msg.SenderID()) { 207 le.logger.Debug(le.id, ": Got message from", msg.SenderID(), "but it is not in the view") 208 break 209 } 210 le.handleMessage(msg) 211 } 212 } 213 } 214 215 func (le *leaderElectionSvcImpl) handleMessage(msg Msg) { 216 msgType := "proposal" 217 if msg.IsDeclaration() { 218 msgType = "declaration" 219 } 220 le.logger.Debug(le.id, ":", msg.SenderID(), "sent us", msgType) 221 le.Lock() 222 defer le.Unlock() 223 224 if msg.IsProposal() { 225 le.proposals.Add(string(msg.SenderID())) 226 } else if msg.IsDeclaration() { 227 atomic.StoreInt32(&le.leaderExists, int32(1)) 228 if le.sleeping && len(le.interruptChan) == 0 { 229 le.interruptChan <- struct{}{} 230 } 231 if bytes.Compare(msg.SenderID(), le.id) < 0 && le.IsLeader() { 232 le.stopBeingLeader() 233 } 234 } else { 235 // We shouldn't get here 236 le.logger.Error("Got a message that's not a proposal and not a declaration") 237 } 238 } 239 240 // waitForInterrupt sleeps until the interrupt channel is triggered 241 // or given timeout expires 242 func (le *leaderElectionSvcImpl) waitForInterrupt(timeout time.Duration) { 243 le.logger.Debug(le.id, ": Entering") 244 defer le.logger.Debug(le.id, ": Exiting") 245 le.Lock() 246 le.sleeping = true 247 le.Unlock() 248 249 select { 250 case <-le.interruptChan: 251 case <-le.stopChan: 252 case <-time.After(timeout): 253 } 254 255 le.Lock() 256 le.sleeping = false 257 // We drain the interrupt channel 258 // because we might get 2 leadership declarations messages 259 // while sleeping, but we would only read 1 of them in the select block above 260 le.drainInterruptChannel() 261 le.Unlock() 262 } 263 264 func (le *leaderElectionSvcImpl) run() { 265 defer le.stopWG.Done() 266 for !le.shouldStop() { 267 if !le.isLeaderExists() { 268 le.leaderElection() 269 } 270 // If we are yielding and some leader has been elected, 271 // stop yielding 272 if le.isLeaderExists() && le.isYielding() { 273 le.stopYielding() 274 } 275 if le.shouldStop() { 276 return 277 } 278 if le.IsLeader() { 279 le.leader() 280 } else { 281 le.follower() 282 } 283 } 284 } 285 286 func (le *leaderElectionSvcImpl) leaderElection() { 287 le.logger.Debug(le.id, ": Entering") 288 defer le.logger.Debug(le.id, ": Exiting") 289 // If we're yielding to other peers, do not participate 290 // in leader election 291 if le.isYielding() { 292 return 293 } 294 // Propose ourselves as a leader 295 le.propose() 296 // Collect other proposals 297 le.waitForInterrupt(le.config.LeaderElectionDuration) 298 // If someone declared itself as a leader, give up 299 // on trying to become a leader too 300 if le.isLeaderExists() { 301 le.logger.Info(le.id, ": Some peer is already a leader") 302 return 303 } 304 305 if le.isYielding() { 306 le.logger.Debug(le.id, ": Aborting leader election because yielding") 307 return 308 } 309 // Leader doesn't exist, let's see if there is a better candidate than us 310 // for being a leader 311 for _, o := range le.proposals.ToArray() { 312 id := o.(string) 313 if bytes.Compare(peerID(id), le.id) < 0 { 314 return 315 } 316 } 317 // If we got here, there is no one that proposed being a leader 318 // that's a better candidate than us. 319 le.beLeader() 320 atomic.StoreInt32(&le.leaderExists, int32(1)) 321 } 322 323 // propose sends a leadership proposal message to remote peers 324 func (le *leaderElectionSvcImpl) propose() { 325 le.logger.Debug(le.id, ": Entering") 326 le.logger.Debug(le.id, ": Exiting") 327 leadershipProposal := le.adapter.CreateMessage(false) 328 le.adapter.Gossip(leadershipProposal) 329 } 330 331 func (le *leaderElectionSvcImpl) follower() { 332 le.logger.Debug(le.id, ": Entering") 333 defer le.logger.Debug(le.id, ": Exiting") 334 335 le.proposals.Clear() 336 atomic.StoreInt32(&le.leaderExists, int32(0)) 337 le.adapter.ReportMetrics(false) 338 select { 339 case <-time.After(le.config.LeaderAliveThreshold): 340 case <-le.stopChan: 341 } 342 } 343 344 func (le *leaderElectionSvcImpl) leader() { 345 leaderDeclaration := le.adapter.CreateMessage(true) 346 le.adapter.Gossip(leaderDeclaration) 347 le.adapter.ReportMetrics(true) 348 le.waitForInterrupt(le.config.LeaderAliveThreshold / 2) 349 } 350 351 // waitForMembershipStabilization waits for membership view to stabilize 352 // or until a time limit expires, or until a peer declares itself as a leader 353 func (le *leaderElectionSvcImpl) waitForMembershipStabilization(timeLimit time.Duration) { 354 le.logger.Debug(le.id, ": Entering") 355 defer le.logger.Debug(le.id, ": Exiting, peers found", len(le.adapter.Peers())) 356 endTime := time.Now().Add(timeLimit) 357 viewSize := len(le.adapter.Peers()) 358 for !le.shouldStop() { 359 time.Sleep(le.config.MembershipSampleInterval) 360 newSize := len(le.adapter.Peers()) 361 if newSize == viewSize || time.Now().After(endTime) || le.isLeaderExists() { 362 return 363 } 364 viewSize = newSize 365 } 366 } 367 368 // drainInterruptChannel clears the interruptChannel 369 // if needed 370 func (le *leaderElectionSvcImpl) drainInterruptChannel() { 371 if len(le.interruptChan) == 1 { 372 <-le.interruptChan 373 } 374 } 375 376 // isAlive returns whether peer of given id is considered alive 377 func (le *leaderElectionSvcImpl) isAlive(id peerID) bool { 378 for _, p := range le.adapter.Peers() { 379 if bytes.Equal(p.ID(), id) { 380 return true 381 } 382 } 383 return false 384 } 385 386 func (le *leaderElectionSvcImpl) isLeaderExists() bool { 387 return atomic.LoadInt32(&le.leaderExists) == int32(1) 388 } 389 390 // IsLeader returns whether this peer is a leader 391 func (le *leaderElectionSvcImpl) IsLeader() bool { 392 isLeader := atomic.LoadInt32(&le.isLeader) == int32(1) 393 le.logger.Debug(le.id, ": Returning", isLeader) 394 return isLeader 395 } 396 397 func (le *leaderElectionSvcImpl) beLeader() { 398 le.logger.Info(le.id, ": Becoming a leader") 399 atomic.StoreInt32(&le.isLeader, int32(1)) 400 le.callback(true) 401 } 402 403 func (le *leaderElectionSvcImpl) stopBeingLeader() { 404 le.logger.Info(le.id, "Stopped being a leader") 405 atomic.StoreInt32(&le.isLeader, int32(0)) 406 le.callback(false) 407 } 408 409 func (le *leaderElectionSvcImpl) shouldStop() bool { 410 select { 411 case <-le.stopChan: 412 return true 413 default: 414 return false 415 } 416 } 417 418 func (le *leaderElectionSvcImpl) isYielding() bool { 419 return atomic.LoadInt32(&le.yield) == int32(1) 420 } 421 422 func (le *leaderElectionSvcImpl) stopYielding() { 423 le.logger.Debug("Stopped yielding") 424 le.Lock() 425 defer le.Unlock() 426 atomic.StoreInt32(&le.yield, int32(0)) 427 le.yieldTimer.Stop() 428 } 429 430 // Yield relinquishes the leadership until a new leader is elected, 431 // or a timeout expires 432 func (le *leaderElectionSvcImpl) Yield() { 433 le.Lock() 434 defer le.Unlock() 435 if !le.IsLeader() || le.isYielding() { 436 return 437 } 438 // Turn on the yield flag 439 atomic.StoreInt32(&le.yield, int32(1)) 440 // Stop being a leader 441 le.stopBeingLeader() 442 // Clear the leader exists flag since it could be that we are the leader 443 atomic.StoreInt32(&le.leaderExists, int32(0)) 444 // Clear the yield flag in any case afterwards 445 le.yieldTimer = time.AfterFunc(le.config.LeaderAliveThreshold*6, func() { 446 atomic.StoreInt32(&le.yield, int32(0)) 447 }) 448 } 449 450 // Stop stops the LeaderElectionService 451 func (le *leaderElectionSvcImpl) Stop() { 452 select { 453 case <-le.stopChan: 454 default: 455 close(le.stopChan) 456 le.logger.Debug(le.id, ": Entering") 457 defer le.logger.Debug(le.id, ": Exiting") 458 le.stopWG.Wait() 459 } 460 }