github.com/kchristidis/fabric@v1.0.4-0.20171028114726-837acd08cde1/gossip/election/election.go (about) 1 /* 2 Copyright IBM Corp. 2016 All Rights Reserved. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package election 18 19 import ( 20 "bytes" 21 "sync" 22 "sync/atomic" 23 "time" 24 25 "github.com/hyperledger/fabric/gossip/util" 26 "github.com/op/go-logging" 27 "github.com/spf13/viper" 28 ) 29 30 // Gossip leader election module 31 // Algorithm properties: 32 // - Peers break symmetry by comparing IDs 33 // - Each peer is either a leader or a follower, 34 // and the aim is to have exactly 1 leader if the membership view 35 // is the same for all peers 36 // - If the network is partitioned into 2 or more sets, the number of leaders 37 // is the number of network partitions, but when the partition heals, 38 // only 1 leader should be left eventually 39 // - Peers communicate by gossiping leadership proposal or declaration messages 40 41 // The Algorithm, in pseudo code: 42 // 43 // 44 // variables: 45 // leaderKnown = false 46 // 47 // Invariant: 48 // Peer listens for messages from remote peers 49 // and whenever it receives a leadership declaration, 50 // leaderKnown is set to true 51 // 52 // Startup(): 53 // wait for membership view to stabilize, or for a leadership declaration is received 54 // or the startup timeout expires. 55 // goto SteadyState() 56 // 57 // SteadyState(): 58 // while true: 59 // If leaderKnown is false: 60 // LeaderElection() 61 // If you are the leader: 62 // Broadcast leadership declaration 63 // If a leadership declaration was received from 64 // a peer with a lower ID, 65 // become a follower 66 // Else, you're a follower: 67 // If haven't received a leadership declaration within 68 // a time threshold: 69 // set leaderKnown to false 70 // 71 // LeaderElection(): 72 // Gossip leadership proposal message 73 // Collect messages from other peers sent within a time period 74 // If received a leadership declaration: 75 // return 76 // Iterate over all proposal messages collected. 77 // If a proposal message from a peer with an ID lower 78 // than yourself was received, return. 79 // Else, declare yourself a leader 80 81 // LeaderElectionAdapter is used by the leader election module 82 // to send and receive messages and to get membership information 83 type LeaderElectionAdapter interface { 84 // Gossip gossips a message to other peers 85 Gossip(Msg) 86 87 // Accept returns a channel that emits messages 88 Accept() <-chan Msg 89 90 // CreateProposalMessage 91 CreateMessage(isDeclaration bool) Msg 92 93 // Peers returns a list of peers considered alive 94 Peers() []Peer 95 } 96 97 type leadershipCallback func(isLeader bool) 98 99 // LeaderElectionService is the object that runs the leader election algorithm 100 type LeaderElectionService interface { 101 // IsLeader returns whether this peer is a leader or not 102 IsLeader() bool 103 104 // Stop stops the LeaderElectionService 105 Stop() 106 107 // Yield relinquishes the leadership until a new leader is elected, 108 // or a timeout expires 109 Yield() 110 } 111 112 type peerID []byte 113 114 // Peer describes a remote peer 115 type Peer interface { 116 // ID returns the ID of the peer 117 ID() peerID 118 } 119 120 // Msg describes a message sent from a remote peer 121 type Msg interface { 122 // SenderID returns the ID of the peer sent the message 123 SenderID() peerID 124 // IsProposal returns whether this message is a leadership proposal 125 IsProposal() bool 126 // IsDeclaration returns whether this message is a leadership declaration 127 IsDeclaration() bool 128 } 129 130 func noopCallback(_ bool) { 131 } 132 133 // NewLeaderElectionService returns a new LeaderElectionService 134 func NewLeaderElectionService(adapter LeaderElectionAdapter, id string, callback leadershipCallback) LeaderElectionService { 135 if len(id) == 0 { 136 panic("Empty id") 137 } 138 le := &leaderElectionSvcImpl{ 139 id: peerID(id), 140 proposals: util.NewSet(), 141 adapter: adapter, 142 stopChan: make(chan struct{}, 1), 143 interruptChan: make(chan struct{}, 1), 144 logger: util.GetLogger(util.LoggingElectionModule, ""), 145 callback: noopCallback, 146 } 147 148 if callback != nil { 149 le.callback = callback 150 } 151 152 go le.start() 153 return le 154 } 155 156 // leaderElectionSvcImpl is an implementation of a LeaderElectionService 157 type leaderElectionSvcImpl struct { 158 id peerID 159 proposals *util.Set 160 sync.Mutex 161 stopChan chan struct{} 162 interruptChan chan struct{} 163 stopWG sync.WaitGroup 164 isLeader int32 165 toDie int32 166 leaderExists int32 167 yield int32 168 sleeping bool 169 adapter LeaderElectionAdapter 170 logger *logging.Logger 171 callback leadershipCallback 172 yieldTimer *time.Timer 173 } 174 175 func (le *leaderElectionSvcImpl) start() { 176 le.stopWG.Add(2) 177 go le.handleMessages() 178 le.waitForMembershipStabilization(getStartupGracePeriod()) 179 go le.run() 180 } 181 182 func (le *leaderElectionSvcImpl) handleMessages() { 183 le.logger.Debug(le.id, ": Entering") 184 defer le.logger.Debug(le.id, ": Exiting") 185 defer le.stopWG.Done() 186 msgChan := le.adapter.Accept() 187 for { 188 select { 189 case <-le.stopChan: 190 le.stopChan <- struct{}{} 191 return 192 case msg := <-msgChan: 193 if !le.isAlive(msg.SenderID()) { 194 le.logger.Debug(le.id, ": Got message from", msg.SenderID(), "but it is not in the view") 195 break 196 } 197 le.handleMessage(msg) 198 } 199 } 200 } 201 202 func (le *leaderElectionSvcImpl) handleMessage(msg Msg) { 203 msgType := "proposal" 204 if msg.IsDeclaration() { 205 msgType = "declaration" 206 } 207 le.logger.Debug(le.id, ":", msg.SenderID(), "sent us", msgType) 208 le.Lock() 209 defer le.Unlock() 210 211 if msg.IsProposal() { 212 le.proposals.Add(string(msg.SenderID())) 213 } else if msg.IsDeclaration() { 214 atomic.StoreInt32(&le.leaderExists, int32(1)) 215 if le.sleeping && len(le.interruptChan) == 0 { 216 le.interruptChan <- struct{}{} 217 } 218 if bytes.Compare(msg.SenderID(), le.id) < 0 && le.IsLeader() { 219 le.stopBeingLeader() 220 } 221 } else { 222 // We shouldn't get here 223 le.logger.Error("Got a message that's not a proposal and not a declaration") 224 } 225 } 226 227 // waitForInterrupt sleeps until the interrupt channel is triggered 228 // or given timeout expires 229 func (le *leaderElectionSvcImpl) waitForInterrupt(timeout time.Duration) { 230 le.logger.Debug(le.id, ": Entering") 231 defer le.logger.Debug(le.id, ": Exiting") 232 le.Lock() 233 le.sleeping = true 234 le.Unlock() 235 236 select { 237 case <-le.interruptChan: 238 case <-le.stopChan: 239 le.stopChan <- struct{}{} 240 case <-time.After(timeout): 241 } 242 243 le.Lock() 244 le.sleeping = false 245 // We drain the interrupt channel 246 // because we might get 2 leadership declarations messages 247 // while sleeping, but we would only read 1 of them in the select block above 248 le.drainInterruptChannel() 249 le.Unlock() 250 } 251 252 func (le *leaderElectionSvcImpl) run() { 253 defer le.stopWG.Done() 254 for !le.shouldStop() { 255 if !le.isLeaderExists() { 256 le.leaderElection() 257 } 258 // If we are yielding and some leader has been elected, 259 // stop yielding 260 if le.isLeaderExists() && le.isYielding() { 261 le.stopYielding() 262 } 263 if le.shouldStop() { 264 return 265 } 266 if le.IsLeader() { 267 le.leader() 268 } else { 269 le.follower() 270 } 271 } 272 } 273 274 func (le *leaderElectionSvcImpl) leaderElection() { 275 le.logger.Debug(le.id, ": Entering") 276 defer le.logger.Debug(le.id, ": Exiting") 277 // If we're yielding to other peers, do not participate 278 // in leader election 279 if le.isYielding() { 280 return 281 } 282 // Propose ourselves as a leader 283 le.propose() 284 // Collect other proposals 285 le.waitForInterrupt(getLeaderElectionDuration()) 286 // If someone declared itself as a leader, give up 287 // on trying to become a leader too 288 if le.isLeaderExists() { 289 le.logger.Debug(le.id, ": Some peer is already a leader") 290 return 291 } 292 293 if le.isYielding() { 294 le.logger.Debug(le.id, ": Aborting leader election because yielding") 295 return 296 } 297 // Leader doesn't exist, let's see if there is a better candidate than us 298 // for being a leader 299 for _, o := range le.proposals.ToArray() { 300 id := o.(string) 301 if bytes.Compare(peerID(id), le.id) < 0 { 302 return 303 } 304 } 305 // If we got here, there is no one that proposed being a leader 306 // that's a better candidate than us. 307 le.beLeader() 308 atomic.StoreInt32(&le.leaderExists, int32(1)) 309 } 310 311 // propose sends a leadership proposal message to remote peers 312 func (le *leaderElectionSvcImpl) propose() { 313 le.logger.Debug(le.id, ": Entering") 314 le.logger.Debug(le.id, ": Exiting") 315 leadershipProposal := le.adapter.CreateMessage(false) 316 le.adapter.Gossip(leadershipProposal) 317 } 318 319 func (le *leaderElectionSvcImpl) follower() { 320 le.logger.Debug(le.id, ": Entering") 321 defer le.logger.Debug(le.id, ": Exiting") 322 323 le.proposals.Clear() 324 atomic.StoreInt32(&le.leaderExists, int32(0)) 325 select { 326 case <-time.After(getLeaderAliveThreshold()): 327 case <-le.stopChan: 328 le.stopChan <- struct{}{} 329 } 330 } 331 332 func (le *leaderElectionSvcImpl) leader() { 333 leaderDeclaration := le.adapter.CreateMessage(true) 334 le.adapter.Gossip(leaderDeclaration) 335 le.waitForInterrupt(getLeadershipDeclarationInterval()) 336 } 337 338 // waitForMembershipStabilization waits for membership view to stabilize 339 // or until a time limit expires, or until a peer declares itself as a leader 340 func (le *leaderElectionSvcImpl) waitForMembershipStabilization(timeLimit time.Duration) { 341 le.logger.Debug(le.id, ": Entering") 342 defer le.logger.Debug(le.id, ": Exiting, peers found", len(le.adapter.Peers())) 343 endTime := time.Now().Add(timeLimit) 344 viewSize := len(le.adapter.Peers()) 345 for !le.shouldStop() { 346 time.Sleep(getMembershipSampleInterval()) 347 newSize := len(le.adapter.Peers()) 348 if newSize == viewSize || time.Now().After(endTime) || le.isLeaderExists() { 349 return 350 } 351 viewSize = newSize 352 } 353 } 354 355 // drainInterruptChannel clears the interruptChannel 356 // if needed 357 func (le *leaderElectionSvcImpl) drainInterruptChannel() { 358 if len(le.interruptChan) == 1 { 359 <-le.interruptChan 360 } 361 } 362 363 // isAlive returns whether peer of given id is considered alive 364 func (le *leaderElectionSvcImpl) isAlive(id peerID) bool { 365 for _, p := range le.adapter.Peers() { 366 if bytes.Equal(p.ID(), id) { 367 return true 368 } 369 } 370 return false 371 } 372 373 func (le *leaderElectionSvcImpl) isLeaderExists() bool { 374 return atomic.LoadInt32(&le.leaderExists) == int32(1) 375 } 376 377 // IsLeader returns whether this peer is a leader 378 func (le *leaderElectionSvcImpl) IsLeader() bool { 379 isLeader := atomic.LoadInt32(&le.isLeader) == int32(1) 380 le.logger.Debug(le.id, ": Returning", isLeader) 381 return isLeader 382 } 383 384 func (le *leaderElectionSvcImpl) beLeader() { 385 le.logger.Debug(le.id, ": Becoming a leader") 386 atomic.StoreInt32(&le.isLeader, int32(1)) 387 le.callback(true) 388 } 389 390 func (le *leaderElectionSvcImpl) stopBeingLeader() { 391 le.logger.Debug(le.id, "Stopped being a leader") 392 atomic.StoreInt32(&le.isLeader, int32(0)) 393 le.callback(false) 394 } 395 396 func (le *leaderElectionSvcImpl) shouldStop() bool { 397 return atomic.LoadInt32(&le.toDie) == int32(1) 398 } 399 400 func (le *leaderElectionSvcImpl) isYielding() bool { 401 return atomic.LoadInt32(&le.yield) == int32(1) 402 } 403 404 func (le *leaderElectionSvcImpl) stopYielding() { 405 le.logger.Debug("Stopped yielding") 406 le.Lock() 407 defer le.Unlock() 408 atomic.StoreInt32(&le.yield, int32(0)) 409 le.yieldTimer.Stop() 410 } 411 412 // Yield relinquishes the leadership until a new leader is elected, 413 // or a timeout expires 414 func (le *leaderElectionSvcImpl) Yield() { 415 le.Lock() 416 defer le.Unlock() 417 if !le.IsLeader() || le.isYielding() { 418 return 419 } 420 // Turn on the yield flag 421 atomic.StoreInt32(&le.yield, int32(1)) 422 // Stop being a leader 423 le.stopBeingLeader() 424 // Clear the leader exists flag since it could be that we are the leader 425 atomic.StoreInt32(&le.leaderExists, int32(0)) 426 // Clear the yield flag in any case afterwards 427 le.yieldTimer = time.AfterFunc(getLeaderAliveThreshold()*6, func() { 428 atomic.StoreInt32(&le.yield, int32(0)) 429 }) 430 } 431 432 // Stop stops the LeaderElectionService 433 func (le *leaderElectionSvcImpl) Stop() { 434 le.logger.Debug(le.id, ": Entering") 435 defer le.logger.Debug(le.id, ": Exiting") 436 atomic.StoreInt32(&le.toDie, int32(1)) 437 le.stopChan <- struct{}{} 438 le.stopWG.Wait() 439 } 440 441 // SetStartupGracePeriod configures startup grace period interval, 442 // the period of time to wait until election algorithm will start 443 func SetStartupGracePeriod(t time.Duration) { 444 viper.Set("peer.gossip.election.startupGracePeriod", t) 445 } 446 447 // SetMembershipSampleInterval setups/initializes the frequency the 448 // membership view should be checked 449 func SetMembershipSampleInterval(t time.Duration) { 450 viper.Set("peer.gossip.election.membershipSampleInterval", t) 451 } 452 453 // SetLeaderAliveThreshold configures leader election alive threshold 454 func SetLeaderAliveThreshold(t time.Duration) { 455 viper.Set("peer.gossip.election.leaderAliveThreshold", t) 456 } 457 458 // SetLeaderElectionDuration configures expected leadership election duration, 459 // interval to wait until leader election will be completed 460 func SetLeaderElectionDuration(t time.Duration) { 461 viper.Set("peer.gossip.election.leaderElectionDuration", t) 462 } 463 464 func getStartupGracePeriod() time.Duration { 465 return util.GetDurationOrDefault("peer.gossip.election.startupGracePeriod", time.Second*15) 466 } 467 468 func getMembershipSampleInterval() time.Duration { 469 return util.GetDurationOrDefault("peer.gossip.election.membershipSampleInterval", time.Second) 470 } 471 472 func getLeaderAliveThreshold() time.Duration { 473 return util.GetDurationOrDefault("peer.gossip.election.leaderAliveThreshold", time.Second*10) 474 } 475 476 func getLeadershipDeclarationInterval() time.Duration { 477 return time.Duration(getLeaderAliveThreshold() / 2) 478 } 479 480 func getLeaderElectionDuration() time.Duration { 481 return util.GetDurationOrDefault("peer.gossip.election.leaderElectionDuration", time.Second*5) 482 } 483 484 // GetMsgExpirationTimeout return leadership message expiration timeout 485 func GetMsgExpirationTimeout() time.Duration { 486 return getLeaderAliveThreshold() * 10 487 }