
     1  package etcdutil
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"os"
     8  	"path"
     9  	"sync/atomic"
    10  	"time"
    12  	""
    13  	""
    14  	""
    15  	""
    16  	etcd ""
    17  )
    19  type LeaderElector interface {
    20  	IsLeader() bool
    21  	Concede() (bool, error)
    22  	Close()
    23  }
    25  var _ LeaderElector = &Election{}
    27  type ElectionEvent struct {
    28  	// True if our candidate is leader
    29  	IsLeader bool
    30  	// True if the election is shutdown and
    31  	// no further events will follow.
    32  	IsDone bool
    33  	// Holds the current leader key
    34  	LeaderKey string
    35  	// Hold the current leaders data
    36  	LeaderData string
    37  	// If not nil, contains an error encountered
    38  	// while participating in the election.
    39  	Err error
    40  }
    42  // Deprecated: use ElectionEvent instead
    43  type Event = ElectionEvent
    45  type EventObserver func(ElectionEvent)
    47  type Election struct {
    48  	observer  EventObserver
    49  	election  string
    50  	candidate string
    51  	backOff   *backOffCounter
    52  	cancel    context.CancelFunc
    53  	wg        syncutil.WaitGroup
    54  	ctx       context.Context
    55  	ttl       time.Duration
    56  	client    *etcd.Client
    57  	session   *Session
    58  	key       string
    59  	isLeader  int32
    60  	isRunning bool
    61  }
    63  type ElectionConfig struct {
    64  	// Optional function when provided is called every time leadership changes or an error occurs
    65  	EventObserver EventObserver
    66  	// The name of the election (IE: scout, blackbird, etc...)
    67  	Election string
    68  	// The name of this instance (IE: worker-n01, worker-n02, etc...)
    69  	Candidate string
    70  	// Seconds to wait before giving up the election if leader disconnected
    71  	TTL int64
    72  }
    74  // NewElection creates a new leader election and submits our candidate for leader.
    75  //
    76  //	 client, _ := etcdutil.NewClient(nil)
    77  //
    78  //	 // Start a leader election and attempt to become leader, only returns after
    79  //	 // determining the current leader.
    80  //	 election := etcdutil.NewElection(client, etcdutil.ElectionConfig{
    81  //	     Election: "presidental",
    82  //	     Candidate: "donald",
    83  //			EventObserver: func(e etcdutil.ElectionEvent) {
    84  //			  	fmt.Printf("Leader Data: %t\n", e.LeaderData)
    85  //				if e.IsLeader {
    86  //					// Do thing as leader
    87  //				}
    88  //			},
    89  //	     TTL: 5,
    90  //	 })
    91  //
    92  //		// Returns true if we are leader (thread safe)
    93  //		if election.IsLeader() {
    94  //			// Do periodic thing
    95  //		}
    96  //
    97  //	 // Concede the election if leader and cancel our candidacy
    98  //	 // for the election.
    99  //	 election.Stop()
   100  func NewElection(ctx context.Context, client *etcd.Client, conf ElectionConfig) (*Election, error) {
   101  	var initialElectionErr error
   102  	readyCh := make(chan struct{})
   103  	initialElection := true
   104  	userObserver := conf.EventObserver
   105  	// Wrap user's observer to intercept the initial election.
   106  	conf.EventObserver = func(event ElectionEvent) {
   107  		if userObserver != nil {
   108  			userObserver(event)
   109  		}
   110  		if initialElection {
   111  			initialElection = false
   112  			initialElectionErr = event.Err
   113  			close(readyCh)
   114  			return
   115  		}
   116  	}
   117  	e := NewElectionAsync(client, conf)
   118  	// Wait for results of the initial leader election.
   119  	select {
   120  	case <-readyCh:
   121  	case <-ctx.Done():
   122  		return nil, ctx.Err()
   123  	}
   124  	return e, errors.WithStack(initialElectionErr)
   125  }
   127  // NewElectionAsync creates a new leader election and submits our candidate for
   128  // leader. It does not wait for the election to complete. The caller must
   129  // provide an election event observer to monitor the election outcome.
   130  //
   131  //	 client, _ := etcdutil.NewClient(nil)
   132  //
   133  //	 // Start a leader election and returns immediately.
   134  //	 election := etcdutil.NewElectionAsync(client, etcdutil.ElectionConfig{
   135  //	     Election: "presidental",
   136  //	     Candidate: "donald",
   137  //			EventObserver: func(e etcdutil.Event) {
   138  //			  	fmt.Printf("Leader Data: %t\n", e.LeaderData)
   139  //				if e.IsLeader {
   140  //					// Do thing as leader
   141  //				}
   142  //			},
   143  //	     TTL: 5,
   144  //	 })
   145  //
   146  //	 // Cancels the election and concedes the election if we are leader.
   147  //	 election.Stop()
   148  func NewElectionAsync(client *etcd.Client, conf ElectionConfig) *Election {
   149  	setter.SetDefault(&conf.Election, "null")
   150  	conf.Election = path.Join("/elections", conf.Election)
   151  	if host, err := os.Hostname(); err == nil {
   152  		setter.SetDefault(&conf.Candidate, host)
   153  	}
   154  	setter.SetDefault(&conf.TTL, int64(5))
   156  	ttlDuration := time.Duration(conf.TTL) * time.Second
   157  	e := Election{
   158  		observer:  conf.EventObserver,
   159  		election:  conf.Election,
   160  		candidate: conf.Candidate,
   161  		ttl:       ttlDuration,
   162  		backOff:   newBackOffCounter(500*time.Millisecond, ttlDuration, 2),
   163  		client:    client,
   164  	}
   165  	e.ctx, e.cancel = context.WithCancel(context.Background())
   166  	e.session = &Session{
   167  		observer: e.onSessionChange,
   168  		ttl:      e.ttl,
   169  		backOff:  newBackOffCounter(500*time.Millisecond, ttlDuration, 2),
   170  		client:   client,
   171  	}
   172  	e.session.start()
   173  	return &e
   174  }
   176  func (e *Election) onSessionChange(leaseID etcd.LeaseID, err error) {
   177  	// log.Debugf("SessionChange: Lease ID: %v running: %t err: %v", leaseID, e.isRunning, err)
   179  	// If we lost our lease, concede the campaign and stop
   180  	if leaseID == NoLease {
   181  		// Avoid stopping twice
   182  		if !e.isRunning {
   183  			return
   184  		}
   185  		e.wg.Stop()
   186  		e.isRunning = false
   187  		atomic.StoreInt32(&e.isLeader, 0)
   188  		if err != nil {
   189  			e.onErr(err, "lease error")
   190  		}
   191  		return
   192  	}
   194  	if e.isRunning {
   195  		return
   196  	}
   198  	e.isRunning = true
   200  	e.wg.Until(func(done chan struct{}) bool {
   201  		var err error
   202  		var rev int64
   204  		rev, err = e.registerCampaign(leaseID)
   205  		if err != nil {
   206  			e.onErr(err, "during campaign registration")
   207  			select {
   208  			case <-time.After(e.backOff.Next()):
   209  				return true
   210  			case <-done:
   211  				e.isRunning = false
   212  				return false
   213  			}
   214  		}
   216  		if err := e.watchCampaign(rev); err != nil {
   217  			e.onErr(err, "during campaign watch")
   218  			select {
   219  			case <-time.After(e.backOff.Next()):
   220  				return true
   221  			case <-done:
   222  			}
   224  			// If delete takes longer than our TTL then lease is expired
   225  			// and we are no longer leader anyway.
   226  			ctx, cancel := context.WithTimeout(context.Background(), e.ttl)
   227  			// Withdraw our candidacy since an error occurred
   228  			if err := e.withDrawCampaign(ctx); err != nil {
   229  				e.onErr(err, "")
   230  			}
   231  			cancel()
   232  			return true
   233  		}
   234  		e.backOff.Reset()
   235  		return false
   236  	})
   237  }
   239  func (e *Election) withDrawCampaign(ctx context.Context) error {
   240  	defer func() {
   241  		atomic.StoreInt32(&e.isLeader, 0)
   242  	}()
   244  	_, err := e.client.Delete(ctx, e.key)
   245  	if err != nil {
   246  		return errors.Wrapf(err, "while withdrawing campaign '%s'", e.key)
   247  	}
   248  	return nil
   249  }
   251  func (e *Election) registerCampaign(id etcd.LeaseID) (revision int64, err error) {
   252  	// Create an entry under the election prefix with our lease ID as the key name
   253  	e.key = fmt.Sprintf("%s%x", e.election, id)
   254  	txn := e.client.Txn(e.ctx).If(etcd.Compare(etcd.CreateRevision(e.key), "=", 0))
   255  	txn = txn.Then(etcd.OpPut(e.key, e.candidate, etcd.WithLease(id)))
   256  	txn = txn.Else(etcd.OpGet(e.key))
   257  	resp, err := txn.Commit()
   258  	if err != nil {
   259  		return 0, err
   260  	}
   261  	revision = resp.Header.Revision
   263  	// This shouldn't happen, our session should always tell us if we disconnected and
   264  	// etcd should have provided us with a unique lease id. If it does happen then
   265  	// we should write our candidate name as the value and assume ownership
   266  	if !resp.Succeeded {
   267  		kv := resp.Responses[0].GetResponseRange().Kvs[0]
   268  		revision = kv.CreateRevision
   269  		if string(kv.Value) != e.candidate {
   270  			if _, err = e.client.Put(e.ctx, e.key, e.candidate); err != nil {
   271  				return 0, err
   272  			}
   273  		}
   274  	}
   275  	return revision, nil
   276  }
   278  // getLeader returns a KV pair for the current leader
   279  func (e *Election) getLeader(ctx context.Context) (*mvccpb.KeyValue, error) {
   280  	// The leader is the first entry under the election prefix
   281  	resp, err := e.client.Get(ctx, e.election, etcd.WithFirstCreate()...)
   282  	if err != nil {
   283  		return nil, err
   284  	}
   285  	if len(resp.Kvs) == 0 {
   286  		return nil, nil
   287  	}
   288  	return resp.Kvs[0], nil
   289  }
   291  // watchCampaign monitors the status of the campaign and notifying any
   292  // changes in leadership to the observer.
   293  func (e *Election) watchCampaign(rev int64) error {
   294  	var watchChan etcd.WatchChan
   295  	ready := make(chan struct{})
   297  	// Get the current leader of this election
   298  	leaderKV, err := e.getLeader(e.ctx)
   299  	if err != nil {
   300  		return errors.Wrap(err, "while querying for current leader")
   301  	}
   302  	if leaderKV == nil {
   303  		return errors.Wrap(err, "found no leader when watch began")
   304  	}
   306  	watcher := etcd.NewWatcher(e.client)
   308  	// We do this because watcher does not reliably return when errors occur on connect
   309  	// or when cancelled (See
   310  	go func() {
   311  		watchChan = watcher.Watch(etcd.WithRequireLeader(e.ctx), e.election,
   312  			etcd.WithRev(int64(rev+1)), etcd.WithPrefix())
   313  		close(ready)
   314  	}()
   316  	select {
   317  	case <-ready:
   318  	case <-e.ctx.Done():
   319  		return errors.Wrap(e.ctx.Err(), "while waiting for etcd watch to start")
   320  	}
   322  	// Notify the observers of the current leader
   323  	e.onLeaderChange(leaderKV)
   325  	e.wg.Until(func(done chan struct{}) bool {
   326  		select {
   327  		case resp := <-watchChan:
   328  			if resp.Canceled {
   329  				e.onFatalErr(errors.New("remote server cancelled watch"), "during campaign watch")
   330  				return false
   331  			}
   332  			if err := resp.Err(); err != nil {
   333  				e.onFatalErr(err, "during campaign watch, remote server returned err")
   334  				return false
   335  			}
   337  			// Watch for changes in leadership
   338  			for _, event := range resp.Events {
   339  				if event.Type == etcd.EventTypeDelete || event.Type == etcd.EventTypePut {
   340  					// If the key is for our current leader
   341  					if bytes.Equal(event.Kv.Key, leaderKV.Key) {
   342  						// Check our leadership status
   343  						resp, err := e.getLeader(e.ctx)
   344  						if err != nil {
   345  							e.onFatalErr(err, "while querying for new leader")
   346  							return false
   347  						}
   349  						// If we have no leader
   350  						if resp == nil {
   351  							e.onFatalErr(err, "After etcd event no leader was found, restarting election")
   352  							return false
   353  						}
   354  						// Notify if leadership has changed
   355  						if !bytes.Equal(resp.Key, leaderKV.Key) {
   356  							leaderKV = resp
   357  							e.onLeaderChange(leaderKV)
   358  						}
   359  					}
   360  				}
   361  			}
   362  		case <-done:
   363  			_ = watcher.Close()
   364  			// If withdraw takes longer than our TTL then lease is expired
   365  			// and we are no longer leader anyway.
   366  			ctx, cancel := context.WithTimeout(context.Background(), e.ttl)
   368  			// Withdraw our candidacy because of shutdown
   369  			if err := e.withDrawCampaign(ctx); err != nil {
   370  				e.onErr(err, "")
   371  			}
   372  			e.onLeaderChange(&mvccpb.KeyValue{})
   373  			cancel()
   374  			return false
   375  		}
   376  		return true
   377  	})
   378  	return nil
   379  }
   381  func (e *Election) onLeaderChange(kv *mvccpb.KeyValue) {
   382  	event := ElectionEvent{}
   383  	if kv != nil {
   384  		if string(kv.Key) == e.key {
   385  			atomic.StoreInt32(&e.isLeader, 1)
   386  			event.IsLeader = true
   387  		} else {
   388  			atomic.StoreInt32(&e.isLeader, 0)
   389  		}
   390  		event.LeaderKey = string(kv.Key)
   391  		event.LeaderData = string(kv.Value)
   392  	} else {
   393  		event.IsDone = true
   394  	}
   395  	if != nil {
   397  	}
   398  }
   400  // onErr reports errors the the observer
   401  func (e *Election) onErr(err error, msg string) {
   402  	atomic.StoreInt32(&e.isLeader, 0)
   403  	if msg != "" {
   404  		err = errors.Wrap(err, msg)
   405  	}
   406  	if != nil {
   407{Err: err})
   408  	}
   409  }
   411  // onFatalErr reports errors to the observer and resets the election and session
   412  func (e *Election) onFatalErr(err error, msg string) {
   413  	e.onErr(err, msg)
   414  	// We call this in a go routine to avoid blocking on `Stop()` calls
   415  	go e.session.Reset()
   416  }
   418  // Close cancels the election and concedes the election if we are leader
   419  func (e *Election) Close() {
   420  	e.session.Close()
   421  	e.wg.Wait()
   422  	// Emit the `Done:true` event
   423  	e.onLeaderChange(nil)
   424  }
   426  // IsLeader returns true if we are leader. It only makes sense if the election
   427  // was created with NewElection that block until the initial election is over.
   428  func (e *Election) IsLeader() bool {
   429  	return atomic.LoadInt32(&e.isLeader) == 1
   430  }
   432  // Concede concedes leadership if we are leader and restarts the campaign returns true.
   433  // if we are not leader do nothing and return false. If you want to concede leadership
   434  // and cancel the campaign call Close() instead.
   435  func (e *Election) Concede() (bool, error) {
   436  	isLeader := atomic.LoadInt32(&e.isLeader)
   437  	if isLeader == 0 {
   438  		return false, nil
   439  	}
   440  	oldCampaignKey := e.key
   441  	e.session.Reset()
   443  	// Ensure there are no lingering candidates
   444  	ctx, cancel := context.WithTimeout(context.Background(), e.ttl)
   445  	cancel()
   447  	_, err := e.client.Delete(ctx, oldCampaignKey)
   448  	if err != nil {
   449  		return true, errors.Wrapf(err, "while cleaning up campaign '%s'", oldCampaignKey)
   450  	}
   452  	return true, nil
   453  }
   455  type AlwaysLeaderMock struct{}
   457  func (s *AlwaysLeaderMock) IsLeader() bool         { return true }
   458  func (s *AlwaysLeaderMock) Concede() (bool, error) { return true, nil }
   459  func (s *AlwaysLeaderMock) Close()                 {}