github.com/darrenli6/fabric-sdk-example@v0.0.0-20220109053535-94b13b56df8c/gossip/election/election.go (about)

     1  /*
     2  Copyright IBM Corp. 2016 All Rights Reserved.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  		 http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package election
    18  
    19  import (
    20  	"bytes"
    21  	"sync"
    22  	"sync/atomic"
    23  	"time"
    24  
    25  	"github.com/hyperledger/fabric/gossip/util"
    26  	"github.com/op/go-logging"
    27  	"github.com/spf13/viper"
    28  )
    29  
    30  // Gossip leader election module
    31  // Algorithm properties:
    32  // - Peers break symmetry by comparing IDs
    33  // - Each peer is either a leader or a follower,
    34  //   and the aim is to have exactly 1 leader if the membership view
    35  //   is the same for all peers
    36  // - If the network is partitioned into 2 or more sets, the number of leaders
    37  //   is the number of network partitions, but when the partition heals,
    38  //   only 1 leader should be left eventually
    39  // - Peers communicate by gossiping leadership proposal or declaration messages
    40  
    41  // The Algorithm, in pseudo code:
    42  //
    43  //
    44  // variables:
    45  // 	leaderKnown = false
    46  //
    47  // Invariant:
    48  //	Peer listens for messages from remote peers
    49  //	and whenever it receives a leadership declaration,
    50  //	leaderKnown is set to true
    51  //
    52  // Startup():
    53  // 	wait for membership view to stabilize, or for a leadership declaration is received
    54  //      or the startup timeout expires.
    55  //	goto SteadyState()
    56  //
    57  // SteadyState():
    58  // 	while true:
    59  //		If leaderKnown is false:
    60  // 			LeaderElection()
    61  //		If you are the leader:
    62  //			Broadcast leadership declaration
    63  //			If a leadership declaration was received from
    64  // 			a peer with a lower ID,
    65  //			become a follower
    66  //		Else, you're a follower:
    67  //			If haven't received a leadership declaration within
    68  // 			a time threshold:
    69  //				set leaderKnown to false
    70  //
    71  // LeaderElection():
    72  // 	Gossip leadership proposal message
    73  //	Collect messages from other peers sent within a time period
    74  //	If received a leadership declaration:
    75  //		return
    76  //	Iterate over all proposal messages collected.
    77  // 	If a proposal message from a peer with an ID lower
    78  // 	than yourself was received, return.
    79  //	Else, declare yourself a leader
    80  
    81  // LeaderElectionAdapter is used by the leader election module
    82  // to send and receive messages and to get membership information
    83  type LeaderElectionAdapter interface {
    84  	// Gossip gossips a message to other peers
    85  	Gossip(Msg)
    86  
    87  	// Accept returns a channel that emits messages
    88  	Accept() <-chan Msg
    89  
    90  	// CreateProposalMessage
    91  	CreateMessage(isDeclaration bool) Msg
    92  
    93  	// Peers returns a list of peers considered alive
    94  	Peers() []Peer
    95  }
    96  
    97  type leadershipCallback func(isLeader bool)
    98  
    99  // LeaderElectionService is the object that runs the leader election algorithm
   100  type LeaderElectionService interface {
   101  	// IsLeader returns whether this peer is a leader or not
   102  	IsLeader() bool
   103  
   104  	// Stop stops the LeaderElectionService
   105  	Stop()
   106  
   107  	// Yield relinquishes the leadership until a new leader is elected,
   108  	// or a timeout expires
   109  	Yield()
   110  }
   111  
   112  type peerID []byte
   113  
   114  // Peer describes a remote peer
   115  type Peer interface {
   116  	// ID returns the ID of the peer
   117  	ID() peerID
   118  }
   119  
   120  // Msg describes a message sent from a remote peer
   121  type Msg interface {
   122  	// SenderID returns the ID of the peer sent the message
   123  	SenderID() peerID
   124  	// IsProposal returns whether this message is a leadership proposal
   125  	IsProposal() bool
   126  	// IsDeclaration returns whether this message is a leadership declaration
   127  	IsDeclaration() bool
   128  }
   129  
   130  func noopCallback(_ bool) {
   131  }
   132  
   133  // NewLeaderElectionService returns a new LeaderElectionService
   134  func NewLeaderElectionService(adapter LeaderElectionAdapter, id string, callback leadershipCallback) LeaderElectionService {
   135  	if len(id) == 0 {
   136  		panic("Empty id")
   137  	}
   138  	le := &leaderElectionSvcImpl{
   139  		id:            peerID(id),
   140  		proposals:     util.NewSet(),
   141  		adapter:       adapter,
   142  		stopChan:      make(chan struct{}, 1),
   143  		interruptChan: make(chan struct{}, 1),
   144  		logger:        util.GetLogger(util.LoggingElectionModule, ""),
   145  		callback:      noopCallback,
   146  	}
   147  
   148  	if callback != nil {
   149  		le.callback = callback
   150  	}
   151  
   152  	go le.start()
   153  	return le
   154  }
   155  
   156  // leaderElectionSvcImpl is an implementation of a LeaderElectionService
   157  type leaderElectionSvcImpl struct {
   158  	id        peerID
   159  	proposals *util.Set
   160  	sync.Mutex
   161  	stopChan      chan struct{}
   162  	interruptChan chan struct{}
   163  	stopWG        sync.WaitGroup
   164  	isLeader      int32
   165  	toDie         int32
   166  	leaderExists  int32
   167  	yield         int32
   168  	sleeping      bool
   169  	adapter       LeaderElectionAdapter
   170  	logger        *logging.Logger
   171  	callback      leadershipCallback
   172  	yieldTimer    *time.Timer
   173  }
   174  
   175  func (le *leaderElectionSvcImpl) start() {
   176  	le.stopWG.Add(2)
   177  	go le.handleMessages()
   178  	le.waitForMembershipStabilization(getStartupGracePeriod())
   179  	go le.run()
   180  }
   181  
   182  func (le *leaderElectionSvcImpl) handleMessages() {
   183  	le.logger.Debug(le.id, ": Entering")
   184  	defer le.logger.Debug(le.id, ": Exiting")
   185  	defer le.stopWG.Done()
   186  	msgChan := le.adapter.Accept()
   187  	for {
   188  		select {
   189  		case <-le.stopChan:
   190  			le.stopChan <- struct{}{}
   191  			return
   192  		case msg := <-msgChan:
   193  			if !le.isAlive(msg.SenderID()) {
   194  				le.logger.Debug(le.id, ": Got message from", msg.SenderID(), "but it is not in the view")
   195  				break
   196  			}
   197  			le.handleMessage(msg)
   198  		}
   199  	}
   200  }
   201  
   202  func (le *leaderElectionSvcImpl) handleMessage(msg Msg) {
   203  	msgType := "proposal"
   204  	if msg.IsDeclaration() {
   205  		msgType = "declaration"
   206  	}
   207  	le.logger.Debug(le.id, ":", msg.SenderID(), "sent us", msgType)
   208  	le.Lock()
   209  	defer le.Unlock()
   210  
   211  	if msg.IsProposal() {
   212  		le.proposals.Add(string(msg.SenderID()))
   213  	} else if msg.IsDeclaration() {
   214  		atomic.StoreInt32(&le.leaderExists, int32(1))
   215  		if le.sleeping && len(le.interruptChan) == 0 {
   216  			le.interruptChan <- struct{}{}
   217  		}
   218  		if bytes.Compare(msg.SenderID(), le.id) < 0 && le.IsLeader() {
   219  			le.stopBeingLeader()
   220  		}
   221  	} else {
   222  		// We shouldn't get here
   223  		le.logger.Error("Got a message that's not a proposal and not a declaration")
   224  	}
   225  }
   226  
   227  // waitForInterrupt sleeps until the interrupt channel is triggered
   228  // or given timeout expires
   229  func (le *leaderElectionSvcImpl) waitForInterrupt(timeout time.Duration) {
   230  	le.logger.Debug(le.id, ": Entering")
   231  	defer le.logger.Debug(le.id, ": Exiting")
   232  	le.Lock()
   233  	le.sleeping = true
   234  	le.Unlock()
   235  
   236  	select {
   237  	case <-le.interruptChan:
   238  	case <-le.stopChan:
   239  		le.stopChan <- struct{}{}
   240  	case <-time.After(timeout):
   241  	}
   242  
   243  	le.Lock()
   244  	le.sleeping = false
   245  	// We drain the interrupt channel
   246  	// because we might get 2 leadership declarations messages
   247  	// while sleeping, but we would only read 1 of them in the select block above
   248  	le.drainInterruptChannel()
   249  	le.Unlock()
   250  }
   251  
   252  func (le *leaderElectionSvcImpl) run() {
   253  	defer le.stopWG.Done()
   254  	for !le.shouldStop() {
   255  		if !le.isLeaderExists() {
   256  			le.leaderElection()
   257  		}
   258  		// If we are yielding and some leader has been elected,
   259  		// stop yielding
   260  		if le.isLeaderExists() && le.isYielding() {
   261  			le.stopYielding()
   262  		}
   263  		if le.shouldStop() {
   264  			return
   265  		}
   266  		if le.IsLeader() {
   267  			le.leader()
   268  		} else {
   269  			le.follower()
   270  		}
   271  	}
   272  }
   273  
   274  func (le *leaderElectionSvcImpl) leaderElection() {
   275  	le.logger.Debug(le.id, ": Entering")
   276  	defer le.logger.Debug(le.id, ": Exiting")
   277  	// If we're yielding to other peers, do not participate
   278  	// in leader election
   279  	if le.isYielding() {
   280  		return
   281  	}
   282  	// Propose ourselves as a leader
   283  	le.propose()
   284  	// Collect other proposals
   285  	le.waitForInterrupt(getLeaderElectionDuration())
   286  	// If someone declared itself as a leader, give up
   287  	// on trying to become a leader too
   288  	if le.isLeaderExists() {
   289  		le.logger.Debug(le.id, ": Some peer is already a leader")
   290  		return
   291  	}
   292  
   293  	if le.isYielding() {
   294  		le.logger.Debug(le.id, ": Aborting leader election because yielding")
   295  		return
   296  	}
   297  	// Leader doesn't exist, let's see if there is a better candidate than us
   298  	// for being a leader
   299  	for _, o := range le.proposals.ToArray() {
   300  		id := o.(string)
   301  		if bytes.Compare(peerID(id), le.id) < 0 {
   302  			return
   303  		}
   304  	}
   305  	// If we got here, there is no one that proposed being a leader
   306  	// that's a better candidate than us.
   307  	le.beLeader()
   308  	atomic.StoreInt32(&le.leaderExists, int32(1))
   309  }
   310  
   311  // propose sends a leadership proposal message to remote peers
   312  func (le *leaderElectionSvcImpl) propose() {
   313  	le.logger.Debug(le.id, ": Entering")
   314  	le.logger.Debug(le.id, ": Exiting")
   315  	leadershipProposal := le.adapter.CreateMessage(false)
   316  	le.adapter.Gossip(leadershipProposal)
   317  }
   318  
   319  func (le *leaderElectionSvcImpl) follower() {
   320  	le.logger.Debug(le.id, ": Entering")
   321  	defer le.logger.Debug(le.id, ": Exiting")
   322  
   323  	le.proposals.Clear()
   324  	atomic.StoreInt32(&le.leaderExists, int32(0))
   325  	select {
   326  	case <-time.After(getLeaderAliveThreshold()):
   327  	case <-le.stopChan:
   328  		le.stopChan <- struct{}{}
   329  	}
   330  }
   331  
   332  func (le *leaderElectionSvcImpl) leader() {
   333  	leaderDeclaration := le.adapter.CreateMessage(true)
   334  	le.adapter.Gossip(leaderDeclaration)
   335  	le.waitForInterrupt(getLeadershipDeclarationInterval())
   336  }
   337  
   338  // waitForMembershipStabilization waits for membership view to stabilize
   339  // or until a time limit expires, or until a peer declares itself as a leader
   340  func (le *leaderElectionSvcImpl) waitForMembershipStabilization(timeLimit time.Duration) {
   341  	le.logger.Debug(le.id, ": Entering")
   342  	defer le.logger.Debug(le.id, ": Exiting, peers found", len(le.adapter.Peers()))
   343  	endTime := time.Now().Add(timeLimit)
   344  	viewSize := len(le.adapter.Peers())
   345  	for !le.shouldStop() {
   346  		time.Sleep(getMembershipSampleInterval())
   347  		newSize := len(le.adapter.Peers())
   348  		if newSize == viewSize || time.Now().After(endTime) || le.isLeaderExists() {
   349  			return
   350  		}
   351  		viewSize = newSize
   352  	}
   353  }
   354  
   355  // drainInterruptChannel clears the interruptChannel
   356  // if needed
   357  func (le *leaderElectionSvcImpl) drainInterruptChannel() {
   358  	if len(le.interruptChan) == 1 {
   359  		<-le.interruptChan
   360  	}
   361  }
   362  
   363  // isAlive returns whether peer of given id is considered alive
   364  func (le *leaderElectionSvcImpl) isAlive(id peerID) bool {
   365  	for _, p := range le.adapter.Peers() {
   366  		if bytes.Equal(p.ID(), id) {
   367  			return true
   368  		}
   369  	}
   370  	return false
   371  }
   372  
   373  func (le *leaderElectionSvcImpl) isLeaderExists() bool {
   374  	return atomic.LoadInt32(&le.leaderExists) == int32(1)
   375  }
   376  
   377  // IsLeader returns whether this peer is a leader
   378  func (le *leaderElectionSvcImpl) IsLeader() bool {
   379  	isLeader := atomic.LoadInt32(&le.isLeader) == int32(1)
   380  	le.logger.Debug(le.id, ": Returning", isLeader)
   381  	return isLeader
   382  }
   383  
   384  func (le *leaderElectionSvcImpl) beLeader() {
   385  	le.logger.Debug(le.id, ": Becoming a leader")
   386  	atomic.StoreInt32(&le.isLeader, int32(1))
   387  	le.callback(true)
   388  }
   389  
   390  func (le *leaderElectionSvcImpl) stopBeingLeader() {
   391  	le.logger.Debug(le.id, "Stopped being a leader")
   392  	atomic.StoreInt32(&le.isLeader, int32(0))
   393  	le.callback(false)
   394  }
   395  
   396  func (le *leaderElectionSvcImpl) shouldStop() bool {
   397  	return atomic.LoadInt32(&le.toDie) == int32(1)
   398  }
   399  
   400  func (le *leaderElectionSvcImpl) isYielding() bool {
   401  	return atomic.LoadInt32(&le.yield) == int32(1)
   402  }
   403  
   404  func (le *leaderElectionSvcImpl) stopYielding() {
   405  	le.logger.Debug("Stopped yielding")
   406  	le.Lock()
   407  	defer le.Unlock()
   408  	atomic.StoreInt32(&le.yield, int32(0))
   409  	le.yieldTimer.Stop()
   410  }
   411  
   412  // Yield relinquishes the leadership until a new leader is elected,
   413  // or a timeout expires
   414  func (le *leaderElectionSvcImpl) Yield() {
   415  	le.Lock()
   416  	defer le.Unlock()
   417  	if !le.IsLeader() || le.isYielding() {
   418  		return
   419  	}
   420  	// Turn on the yield flag
   421  	atomic.StoreInt32(&le.yield, int32(1))
   422  	// Stop being a leader
   423  	le.stopBeingLeader()
   424  	// Clear the leader exists flag since it could be that we are the leader
   425  	atomic.StoreInt32(&le.leaderExists, int32(0))
   426  	// Clear the yield flag in any case afterwards
   427  	le.yieldTimer = time.AfterFunc(getLeaderAliveThreshold()*6, func() {
   428  		atomic.StoreInt32(&le.yield, int32(0))
   429  	})
   430  }
   431  
   432  // Stop stops the LeaderElectionService
   433  func (le *leaderElectionSvcImpl) Stop() {
   434  	le.logger.Debug(le.id, ": Entering")
   435  	defer le.logger.Debug(le.id, ": Exiting")
   436  	atomic.StoreInt32(&le.toDie, int32(1))
   437  	le.stopChan <- struct{}{}
   438  	le.stopWG.Wait()
   439  }
   440  
   441  // SetStartupGracePeriod configures startup grace period interval,
   442  // the period of time to wait until election algorithm will start
   443  func SetStartupGracePeriod(t time.Duration) {
   444  	viper.Set("peer.gossip.election.startupGracePeriod", t)
   445  }
   446  
   447  // SetMembershipSampleInterval setups/initializes the frequency the
   448  // membership view should be checked
   449  func SetMembershipSampleInterval(t time.Duration) {
   450  	viper.Set("peer.gossip.election.membershipSampleInterval", t)
   451  }
   452  
   453  // SetLeaderAliveThreshold configures leader election alive threshold
   454  func SetLeaderAliveThreshold(t time.Duration) {
   455  	viper.Set("peer.gossip.election.leaderAliveThreshold", t)
   456  }
   457  
   458  // SetLeaderElectionDuration configures expected leadership election duration,
   459  // interval to wait until leader election will be completed
   460  func SetLeaderElectionDuration(t time.Duration) {
   461  	viper.Set("peer.gossip.election.leaderElectionDuration", t)
   462  }
   463  
   464  func getStartupGracePeriod() time.Duration {
   465  	return util.GetDurationOrDefault("peer.gossip.election.startupGracePeriod", time.Second*15)
   466  }
   467  
   468  func getMembershipSampleInterval() time.Duration {
   469  	return util.GetDurationOrDefault("peer.gossip.election.membershipSampleInterval", time.Second)
   470  }
   471  
   472  func getLeaderAliveThreshold() time.Duration {
   473  	return util.GetDurationOrDefault("peer.gossip.election.leaderAliveThreshold", time.Second*10)
   474  }
   475  
   476  func getLeadershipDeclarationInterval() time.Duration {
   477  	return time.Duration(getLeaderAliveThreshold() / 2)
   478  }
   479  
   480  func getLeaderElectionDuration() time.Duration {
   481  	return util.GetDurationOrDefault("peer.gossip.election.leaderElectionDuration", time.Second*5)
   482  }
   483  
   484  // GetMsgExpirationTimeout return leadership message expiration timeout
   485  func GetMsgExpirationTimeout() time.Duration {
   486  	return getLeaderAliveThreshold() * 10
   487  }