github.com/defanghe/fabric@v2.1.1+incompatible/gossip/election/election.go (about)

     1  /*
     2  Copyright IBM Corp. All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package election
     8  
     9  import (
    10  	"bytes"
    11  	"encoding/hex"
    12  	"sync"
    13  	"sync/atomic"
    14  	"time"
    15  
    16  	"github.com/hyperledger/fabric/gossip/util"
    17  )
    18  
    19  // Gossip leader election module
    20  // Algorithm properties:
    21  // - Peers break symmetry by comparing IDs
    22  // - Each peer is either a leader or a follower,
    23  //   and the aim is to have exactly 1 leader if the membership view
    24  //   is the same for all peers
    25  // - If the network is partitioned into 2 or more sets, the number of leaders
    26  //   is the number of network partitions, but when the partition heals,
    27  //   only 1 leader should be left eventually
    28  // - Peers communicate by gossiping leadership proposal or declaration messages
    29  
    30  // The Algorithm, in pseudo code:
    31  //
    32  //
    33  // variables:
    34  // 	leaderKnown = false
    35  //
    36  // Invariant:
    37  //	Peer listens for messages from remote peers
    38  //	and whenever it receives a leadership declaration,
    39  //	leaderKnown is set to true
    40  //
    41  // Startup():
    42  // 	wait for membership view to stabilize, or for a leadership declaration is received
    43  //      or the startup timeout expires.
    44  //	goto SteadyState()
    45  //
    46  // SteadyState():
    47  // 	while true:
    48  //		If leaderKnown is false:
    49  // 			LeaderElection()
    50  //		If you are the leader:
    51  //			Broadcast leadership declaration
    52  //			If a leadership declaration was received from
    53  // 			a peer with a lower ID,
    54  //			become a follower
    55  //		Else, you're a follower:
    56  //			If haven't received a leadership declaration within
    57  // 			a time threshold:
    58  //				set leaderKnown to false
    59  //
    60  // LeaderElection():
    61  // 	Gossip leadership proposal message
    62  //	Collect messages from other peers sent within a time period
    63  //	If received a leadership declaration:
    64  //		return
    65  //	Iterate over all proposal messages collected.
    66  // 	If a proposal message from a peer with an ID lower
    67  // 	than yourself was received, return.
    68  //	Else, declare yourself a leader
    69  
    70  // LeaderElectionAdapter is used by the leader election module
    71  // to send and receive messages and to get membership information
    72  type LeaderElectionAdapter interface {
    73  	// Gossip gossips a message to other peers
    74  	Gossip(Msg)
    75  
    76  	// Accept returns a channel that emits messages
    77  	Accept() <-chan Msg
    78  
    79  	// CreateProposalMessage
    80  	CreateMessage(isDeclaration bool) Msg
    81  
    82  	// Peers returns a list of peers considered alive
    83  	Peers() []Peer
    84  
    85  	// ReportMetrics sends a report to the metrics server about a leadership status
    86  	ReportMetrics(isLeader bool)
    87  }
    88  
    89  type leadershipCallback func(isLeader bool)
    90  
    91  // LeaderElectionService is the object that runs the leader election algorithm
    92  type LeaderElectionService interface {
    93  	// IsLeader returns whether this peer is a leader or not
    94  	IsLeader() bool
    95  
    96  	// Stop stops the LeaderElectionService
    97  	Stop()
    98  
    99  	// Yield relinquishes the leadership until a new leader is elected,
   100  	// or a timeout expires
   101  	Yield()
   102  }
   103  
   104  type peerID []byte
   105  
   106  func (p peerID) String() string {
   107  	if p == nil {
   108  		return "<nil>"
   109  	}
   110  	return hex.EncodeToString(p)
   111  }
   112  
   113  // Peer describes a remote peer
   114  type Peer interface {
   115  	// ID returns the ID of the peer
   116  	ID() peerID
   117  }
   118  
   119  // Msg describes a message sent from a remote peer
   120  type Msg interface {
   121  	// SenderID returns the ID of the peer sent the message
   122  	SenderID() peerID
   123  	// IsProposal returns whether this message is a leadership proposal
   124  	IsProposal() bool
   125  	// IsDeclaration returns whether this message is a leadership declaration
   126  	IsDeclaration() bool
   127  }
   128  
   129  func noopCallback(_ bool) {
   130  }
   131  
   132  const (
   133  	DefStartupGracePeriod       = time.Second * 15
   134  	DefMembershipSampleInterval = time.Second
   135  	DefLeaderAliveThreshold     = time.Second * 10
   136  	DefLeaderElectionDuration   = time.Second * 5
   137  )
   138  
   139  type ElectionConfig struct {
   140  	StartupGracePeriod       time.Duration
   141  	MembershipSampleInterval time.Duration
   142  	LeaderAliveThreshold     time.Duration
   143  	LeaderElectionDuration   time.Duration
   144  }
   145  
   146  // NewLeaderElectionService returns a new LeaderElectionService
   147  func NewLeaderElectionService(adapter LeaderElectionAdapter, id string, callback leadershipCallback, config ElectionConfig) LeaderElectionService {
   148  	if len(id) == 0 {
   149  		panic("Empty id")
   150  	}
   151  	le := &leaderElectionSvcImpl{
   152  		id:            peerID(id),
   153  		proposals:     util.NewSet(),
   154  		adapter:       adapter,
   155  		stopChan:      make(chan struct{}),
   156  		interruptChan: make(chan struct{}, 1),
   157  		logger:        util.GetLogger(util.ElectionLogger, ""),
   158  		callback:      noopCallback,
   159  		config:        config,
   160  	}
   161  
   162  	if callback != nil {
   163  		le.callback = callback
   164  	}
   165  
   166  	go le.start()
   167  	return le
   168  }
   169  
   170  // leaderElectionSvcImpl is an implementation of a LeaderElectionService
   171  type leaderElectionSvcImpl struct {
   172  	id        peerID
   173  	proposals *util.Set
   174  	sync.Mutex
   175  	stopChan      chan struct{}
   176  	interruptChan chan struct{}
   177  	stopWG        sync.WaitGroup
   178  	isLeader      int32
   179  	leaderExists  int32
   180  	yield         int32
   181  	sleeping      bool
   182  	adapter       LeaderElectionAdapter
   183  	logger        util.Logger
   184  	callback      leadershipCallback
   185  	yieldTimer    *time.Timer
   186  	config        ElectionConfig
   187  }
   188  
   189  func (le *leaderElectionSvcImpl) start() {
   190  	le.stopWG.Add(2)
   191  	go le.handleMessages()
   192  	le.waitForMembershipStabilization(le.config.StartupGracePeriod)
   193  	go le.run()
   194  }
   195  
   196  func (le *leaderElectionSvcImpl) handleMessages() {
   197  	le.logger.Debug(le.id, ": Entering")
   198  	defer le.logger.Debug(le.id, ": Exiting")
   199  	defer le.stopWG.Done()
   200  	msgChan := le.adapter.Accept()
   201  	for {
   202  		select {
   203  		case <-le.stopChan:
   204  			return
   205  		case msg := <-msgChan:
   206  			if !le.isAlive(msg.SenderID()) {
   207  				le.logger.Debug(le.id, ": Got message from", msg.SenderID(), "but it is not in the view")
   208  				break
   209  			}
   210  			le.handleMessage(msg)
   211  		}
   212  	}
   213  }
   214  
   215  func (le *leaderElectionSvcImpl) handleMessage(msg Msg) {
   216  	msgType := "proposal"
   217  	if msg.IsDeclaration() {
   218  		msgType = "declaration"
   219  	}
   220  	le.logger.Debug(le.id, ":", msg.SenderID(), "sent us", msgType)
   221  	le.Lock()
   222  	defer le.Unlock()
   223  
   224  	if msg.IsProposal() {
   225  		le.proposals.Add(string(msg.SenderID()))
   226  	} else if msg.IsDeclaration() {
   227  		atomic.StoreInt32(&le.leaderExists, int32(1))
   228  		if le.sleeping && len(le.interruptChan) == 0 {
   229  			le.interruptChan <- struct{}{}
   230  		}
   231  		if bytes.Compare(msg.SenderID(), le.id) < 0 && le.IsLeader() {
   232  			le.stopBeingLeader()
   233  		}
   234  	} else {
   235  		// We shouldn't get here
   236  		le.logger.Error("Got a message that's not a proposal and not a declaration")
   237  	}
   238  }
   239  
   240  // waitForInterrupt sleeps until the interrupt channel is triggered
   241  // or given timeout expires
   242  func (le *leaderElectionSvcImpl) waitForInterrupt(timeout time.Duration) {
   243  	le.logger.Debug(le.id, ": Entering")
   244  	defer le.logger.Debug(le.id, ": Exiting")
   245  	le.Lock()
   246  	le.sleeping = true
   247  	le.Unlock()
   248  
   249  	select {
   250  	case <-le.interruptChan:
   251  	case <-le.stopChan:
   252  	case <-time.After(timeout):
   253  	}
   254  
   255  	le.Lock()
   256  	le.sleeping = false
   257  	// We drain the interrupt channel
   258  	// because we might get 2 leadership declarations messages
   259  	// while sleeping, but we would only read 1 of them in the select block above
   260  	le.drainInterruptChannel()
   261  	le.Unlock()
   262  }
   263  
   264  func (le *leaderElectionSvcImpl) run() {
   265  	defer le.stopWG.Done()
   266  	for !le.shouldStop() {
   267  		if !le.isLeaderExists() {
   268  			le.leaderElection()
   269  		}
   270  		// If we are yielding and some leader has been elected,
   271  		// stop yielding
   272  		if le.isLeaderExists() && le.isYielding() {
   273  			le.stopYielding()
   274  		}
   275  		if le.shouldStop() {
   276  			return
   277  		}
   278  		if le.IsLeader() {
   279  			le.leader()
   280  		} else {
   281  			le.follower()
   282  		}
   283  	}
   284  }
   285  
   286  func (le *leaderElectionSvcImpl) leaderElection() {
   287  	le.logger.Debug(le.id, ": Entering")
   288  	defer le.logger.Debug(le.id, ": Exiting")
   289  	// If we're yielding to other peers, do not participate
   290  	// in leader election
   291  	if le.isYielding() {
   292  		return
   293  	}
   294  	// Propose ourselves as a leader
   295  	le.propose()
   296  	// Collect other proposals
   297  	le.waitForInterrupt(le.config.LeaderElectionDuration)
   298  	// If someone declared itself as a leader, give up
   299  	// on trying to become a leader too
   300  	if le.isLeaderExists() {
   301  		le.logger.Info(le.id, ": Some peer is already a leader")
   302  		return
   303  	}
   304  
   305  	if le.isYielding() {
   306  		le.logger.Debug(le.id, ": Aborting leader election because yielding")
   307  		return
   308  	}
   309  	// Leader doesn't exist, let's see if there is a better candidate than us
   310  	// for being a leader
   311  	for _, o := range le.proposals.ToArray() {
   312  		id := o.(string)
   313  		if bytes.Compare(peerID(id), le.id) < 0 {
   314  			return
   315  		}
   316  	}
   317  	// If we got here, there is no one that proposed being a leader
   318  	// that's a better candidate than us.
   319  	le.beLeader()
   320  	atomic.StoreInt32(&le.leaderExists, int32(1))
   321  }
   322  
   323  // propose sends a leadership proposal message to remote peers
   324  func (le *leaderElectionSvcImpl) propose() {
   325  	le.logger.Debug(le.id, ": Entering")
   326  	le.logger.Debug(le.id, ": Exiting")
   327  	leadershipProposal := le.adapter.CreateMessage(false)
   328  	le.adapter.Gossip(leadershipProposal)
   329  }
   330  
   331  func (le *leaderElectionSvcImpl) follower() {
   332  	le.logger.Debug(le.id, ": Entering")
   333  	defer le.logger.Debug(le.id, ": Exiting")
   334  
   335  	le.proposals.Clear()
   336  	atomic.StoreInt32(&le.leaderExists, int32(0))
   337  	le.adapter.ReportMetrics(false)
   338  	select {
   339  	case <-time.After(le.config.LeaderAliveThreshold):
   340  	case <-le.stopChan:
   341  	}
   342  }
   343  
   344  func (le *leaderElectionSvcImpl) leader() {
   345  	leaderDeclaration := le.adapter.CreateMessage(true)
   346  	le.adapter.Gossip(leaderDeclaration)
   347  	le.adapter.ReportMetrics(true)
   348  	le.waitForInterrupt(le.config.LeaderAliveThreshold / 2)
   349  }
   350  
   351  // waitForMembershipStabilization waits for membership view to stabilize
   352  // or until a time limit expires, or until a peer declares itself as a leader
   353  func (le *leaderElectionSvcImpl) waitForMembershipStabilization(timeLimit time.Duration) {
   354  	le.logger.Debug(le.id, ": Entering")
   355  	defer le.logger.Debug(le.id, ": Exiting, peers found", len(le.adapter.Peers()))
   356  	endTime := time.Now().Add(timeLimit)
   357  	viewSize := len(le.adapter.Peers())
   358  	for !le.shouldStop() {
   359  		time.Sleep(le.config.MembershipSampleInterval)
   360  		newSize := len(le.adapter.Peers())
   361  		if newSize == viewSize || time.Now().After(endTime) || le.isLeaderExists() {
   362  			return
   363  		}
   364  		viewSize = newSize
   365  	}
   366  }
   367  
   368  // drainInterruptChannel clears the interruptChannel
   369  // if needed
   370  func (le *leaderElectionSvcImpl) drainInterruptChannel() {
   371  	if len(le.interruptChan) == 1 {
   372  		<-le.interruptChan
   373  	}
   374  }
   375  
   376  // isAlive returns whether peer of given id is considered alive
   377  func (le *leaderElectionSvcImpl) isAlive(id peerID) bool {
   378  	for _, p := range le.adapter.Peers() {
   379  		if bytes.Equal(p.ID(), id) {
   380  			return true
   381  		}
   382  	}
   383  	return false
   384  }
   385  
   386  func (le *leaderElectionSvcImpl) isLeaderExists() bool {
   387  	return atomic.LoadInt32(&le.leaderExists) == int32(1)
   388  }
   389  
   390  // IsLeader returns whether this peer is a leader
   391  func (le *leaderElectionSvcImpl) IsLeader() bool {
   392  	isLeader := atomic.LoadInt32(&le.isLeader) == int32(1)
   393  	le.logger.Debug(le.id, ": Returning", isLeader)
   394  	return isLeader
   395  }
   396  
   397  func (le *leaderElectionSvcImpl) beLeader() {
   398  	le.logger.Info(le.id, ": Becoming a leader")
   399  	atomic.StoreInt32(&le.isLeader, int32(1))
   400  	le.callback(true)
   401  }
   402  
   403  func (le *leaderElectionSvcImpl) stopBeingLeader() {
   404  	le.logger.Info(le.id, "Stopped being a leader")
   405  	atomic.StoreInt32(&le.isLeader, int32(0))
   406  	le.callback(false)
   407  }
   408  
   409  func (le *leaderElectionSvcImpl) shouldStop() bool {
   410  	select {
   411  	case <-le.stopChan:
   412  		return true
   413  	default:
   414  		return false
   415  	}
   416  }
   417  
   418  func (le *leaderElectionSvcImpl) isYielding() bool {
   419  	return atomic.LoadInt32(&le.yield) == int32(1)
   420  }
   421  
   422  func (le *leaderElectionSvcImpl) stopYielding() {
   423  	le.logger.Debug("Stopped yielding")
   424  	le.Lock()
   425  	defer le.Unlock()
   426  	atomic.StoreInt32(&le.yield, int32(0))
   427  	le.yieldTimer.Stop()
   428  }
   429  
   430  // Yield relinquishes the leadership until a new leader is elected,
   431  // or a timeout expires
   432  func (le *leaderElectionSvcImpl) Yield() {
   433  	le.Lock()
   434  	defer le.Unlock()
   435  	if !le.IsLeader() || le.isYielding() {
   436  		return
   437  	}
   438  	// Turn on the yield flag
   439  	atomic.StoreInt32(&le.yield, int32(1))
   440  	// Stop being a leader
   441  	le.stopBeingLeader()
   442  	// Clear the leader exists flag since it could be that we are the leader
   443  	atomic.StoreInt32(&le.leaderExists, int32(0))
   444  	// Clear the yield flag in any case afterwards
   445  	le.yieldTimer = time.AfterFunc(le.config.LeaderAliveThreshold*6, func() {
   446  		atomic.StoreInt32(&le.yield, int32(0))
   447  	})
   448  }
   449  
   450  // Stop stops the LeaderElectionService
   451  func (le *leaderElectionSvcImpl) Stop() {
   452  	select {
   453  	case <-le.stopChan:
   454  	default:
   455  		close(le.stopChan)
   456  		le.logger.Debug(le.id, ": Entering")
   457  		defer le.logger.Debug(le.id, ": Exiting")
   458  		le.stopWG.Wait()
   459  	}
   460  }