github.com/mailgun/holster/v4@v4.20.0/etcdutil/election.go (about) 1 package etcdutil 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "os" 8 "path" 9 "sync/atomic" 10 "time" 11 12 "github.com/mailgun/holster/v4/errors" 13 "github.com/mailgun/holster/v4/setter" 14 "github.com/mailgun/holster/v4/syncutil" 15 "go.etcd.io/etcd/api/v3/mvccpb" 16 etcd "go.etcd.io/etcd/client/v3" 17 ) 18 19 type LeaderElector interface { 20 IsLeader() bool 21 Concede() (bool, error) 22 Close() 23 } 24 25 var _ LeaderElector = &Election{} 26 27 type ElectionEvent struct { 28 // True if our candidate is leader 29 IsLeader bool 30 // True if the election is shutdown and 31 // no further events will follow. 32 IsDone bool 33 // Holds the current leader key 34 LeaderKey string 35 // Hold the current leaders data 36 LeaderData string 37 // If not nil, contains an error encountered 38 // while participating in the election. 39 Err error 40 } 41 42 // Deprecated: use ElectionEvent instead 43 type Event = ElectionEvent 44 45 type EventObserver func(ElectionEvent) 46 47 type Election struct { 48 observer EventObserver 49 election string 50 candidate string 51 backOff *backOffCounter 52 cancel context.CancelFunc 53 wg syncutil.WaitGroup 54 ctx context.Context 55 ttl time.Duration 56 client *etcd.Client 57 session *Session 58 key string 59 isLeader int32 60 isRunning bool 61 } 62 63 type ElectionConfig struct { 64 // Optional function when provided is called every time leadership changes or an error occurs 65 EventObserver EventObserver 66 // The name of the election (IE: scout, blackbird, etc...) 67 Election string 68 // The name of this instance (IE: worker-n01, worker-n02, etc...) 69 Candidate string 70 // Seconds to wait before giving up the election if leader disconnected 71 TTL int64 72 } 73 74 // NewElection creates a new leader election and submits our candidate for leader. 75 // 76 // client, _ := etcdutil.NewClient(nil) 77 // 78 // // Start a leader election and attempt to become leader, only returns after 79 // // determining the current leader. 80 // election := etcdutil.NewElection(client, etcdutil.ElectionConfig{ 81 // Election: "presidental", 82 // Candidate: "donald", 83 // EventObserver: func(e etcdutil.ElectionEvent) { 84 // fmt.Printf("Leader Data: %t\n", e.LeaderData) 85 // if e.IsLeader { 86 // // Do thing as leader 87 // } 88 // }, 89 // TTL: 5, 90 // }) 91 // 92 // // Returns true if we are leader (thread safe) 93 // if election.IsLeader() { 94 // // Do periodic thing 95 // } 96 // 97 // // Concede the election if leader and cancel our candidacy 98 // // for the election. 99 // election.Stop() 100 func NewElection(ctx context.Context, client *etcd.Client, conf ElectionConfig) (*Election, error) { 101 var initialElectionErr error 102 readyCh := make(chan struct{}) 103 initialElection := true 104 userObserver := conf.EventObserver 105 // Wrap user's observer to intercept the initial election. 106 conf.EventObserver = func(event ElectionEvent) { 107 if userObserver != nil { 108 userObserver(event) 109 } 110 if initialElection { 111 initialElection = false 112 initialElectionErr = event.Err 113 close(readyCh) 114 return 115 } 116 } 117 e := NewElectionAsync(client, conf) 118 // Wait for results of the initial leader election. 119 select { 120 case <-readyCh: 121 case <-ctx.Done(): 122 return nil, ctx.Err() 123 } 124 return e, errors.WithStack(initialElectionErr) 125 } 126 127 // NewElectionAsync creates a new leader election and submits our candidate for 128 // leader. It does not wait for the election to complete. The caller must 129 // provide an election event observer to monitor the election outcome. 130 // 131 // client, _ := etcdutil.NewClient(nil) 132 // 133 // // Start a leader election and returns immediately. 134 // election := etcdutil.NewElectionAsync(client, etcdutil.ElectionConfig{ 135 // Election: "presidental", 136 // Candidate: "donald", 137 // EventObserver: func(e etcdutil.Event) { 138 // fmt.Printf("Leader Data: %t\n", e.LeaderData) 139 // if e.IsLeader { 140 // // Do thing as leader 141 // } 142 // }, 143 // TTL: 5, 144 // }) 145 // 146 // // Cancels the election and concedes the election if we are leader. 147 // election.Stop() 148 func NewElectionAsync(client *etcd.Client, conf ElectionConfig) *Election { 149 setter.SetDefault(&conf.Election, "null") 150 conf.Election = path.Join("/elections", conf.Election) 151 if host, err := os.Hostname(); err == nil { 152 setter.SetDefault(&conf.Candidate, host) 153 } 154 setter.SetDefault(&conf.TTL, int64(5)) 155 156 ttlDuration := time.Duration(conf.TTL) * time.Second 157 e := Election{ 158 observer: conf.EventObserver, 159 election: conf.Election, 160 candidate: conf.Candidate, 161 ttl: ttlDuration, 162 backOff: newBackOffCounter(500*time.Millisecond, ttlDuration, 2), 163 client: client, 164 } 165 e.ctx, e.cancel = context.WithCancel(context.Background()) 166 e.session = &Session{ 167 observer: e.onSessionChange, 168 ttl: e.ttl, 169 backOff: newBackOffCounter(500*time.Millisecond, ttlDuration, 2), 170 client: client, 171 } 172 e.session.start() 173 return &e 174 } 175 176 func (e *Election) onSessionChange(leaseID etcd.LeaseID, err error) { 177 // log.Debugf("SessionChange: Lease ID: %v running: %t err: %v", leaseID, e.isRunning, err) 178 179 // If we lost our lease, concede the campaign and stop 180 if leaseID == NoLease { 181 // Avoid stopping twice 182 if !e.isRunning { 183 return 184 } 185 e.wg.Stop() 186 e.isRunning = false 187 atomic.StoreInt32(&e.isLeader, 0) 188 if err != nil { 189 e.onErr(err, "lease error") 190 } 191 return 192 } 193 194 if e.isRunning { 195 return 196 } 197 198 e.isRunning = true 199 200 e.wg.Until(func(done chan struct{}) bool { 201 var err error 202 var rev int64 203 204 rev, err = e.registerCampaign(leaseID) 205 if err != nil { 206 e.onErr(err, "during campaign registration") 207 select { 208 case <-time.After(e.backOff.Next()): 209 return true 210 case <-done: 211 e.isRunning = false 212 return false 213 } 214 } 215 216 if err := e.watchCampaign(rev); err != nil { 217 e.onErr(err, "during campaign watch") 218 select { 219 case <-time.After(e.backOff.Next()): 220 return true 221 case <-done: 222 } 223 224 // If delete takes longer than our TTL then lease is expired 225 // and we are no longer leader anyway. 226 ctx, cancel := context.WithTimeout(context.Background(), e.ttl) 227 // Withdraw our candidacy since an error occurred 228 if err := e.withDrawCampaign(ctx); err != nil { 229 e.onErr(err, "") 230 } 231 cancel() 232 return true 233 } 234 e.backOff.Reset() 235 return false 236 }) 237 } 238 239 func (e *Election) withDrawCampaign(ctx context.Context) error { 240 defer func() { 241 atomic.StoreInt32(&e.isLeader, 0) 242 }() 243 244 _, err := e.client.Delete(ctx, e.key) 245 if err != nil { 246 return errors.Wrapf(err, "while withdrawing campaign '%s'", e.key) 247 } 248 return nil 249 } 250 251 func (e *Election) registerCampaign(id etcd.LeaseID) (revision int64, err error) { 252 // Create an entry under the election prefix with our lease ID as the key name 253 e.key = fmt.Sprintf("%s%x", e.election, id) 254 txn := e.client.Txn(e.ctx).If(etcd.Compare(etcd.CreateRevision(e.key), "=", 0)) 255 txn = txn.Then(etcd.OpPut(e.key, e.candidate, etcd.WithLease(id))) 256 txn = txn.Else(etcd.OpGet(e.key)) 257 resp, err := txn.Commit() 258 if err != nil { 259 return 0, err 260 } 261 revision = resp.Header.Revision 262 263 // This shouldn't happen, our session should always tell us if we disconnected and 264 // etcd should have provided us with a unique lease id. If it does happen then 265 // we should write our candidate name as the value and assume ownership 266 if !resp.Succeeded { 267 kv := resp.Responses[0].GetResponseRange().Kvs[0] 268 revision = kv.CreateRevision 269 if string(kv.Value) != e.candidate { 270 if _, err = e.client.Put(e.ctx, e.key, e.candidate); err != nil { 271 return 0, err 272 } 273 } 274 } 275 return revision, nil 276 } 277 278 // getLeader returns a KV pair for the current leader 279 func (e *Election) getLeader(ctx context.Context) (*mvccpb.KeyValue, error) { 280 // The leader is the first entry under the election prefix 281 resp, err := e.client.Get(ctx, e.election, etcd.WithFirstCreate()...) 282 if err != nil { 283 return nil, err 284 } 285 if len(resp.Kvs) == 0 { 286 return nil, nil 287 } 288 return resp.Kvs[0], nil 289 } 290 291 // watchCampaign monitors the status of the campaign and notifying any 292 // changes in leadership to the observer. 293 func (e *Election) watchCampaign(rev int64) error { 294 var watchChan etcd.WatchChan 295 ready := make(chan struct{}) 296 297 // Get the current leader of this election 298 leaderKV, err := e.getLeader(e.ctx) 299 if err != nil { 300 return errors.Wrap(err, "while querying for current leader") 301 } 302 if leaderKV == nil { 303 return errors.Wrap(err, "found no leader when watch began") 304 } 305 306 watcher := etcd.NewWatcher(e.client) 307 308 // We do this because watcher does not reliably return when errors occur on connect 309 // or when cancelled (See https://github.com/etcd-io/etcd/pull/10020) 310 go func() { 311 watchChan = watcher.Watch(etcd.WithRequireLeader(e.ctx), e.election, 312 etcd.WithRev(int64(rev+1)), etcd.WithPrefix()) 313 close(ready) 314 }() 315 316 select { 317 case <-ready: 318 case <-e.ctx.Done(): 319 return errors.Wrap(e.ctx.Err(), "while waiting for etcd watch to start") 320 } 321 322 // Notify the observers of the current leader 323 e.onLeaderChange(leaderKV) 324 325 e.wg.Until(func(done chan struct{}) bool { 326 select { 327 case resp := <-watchChan: 328 if resp.Canceled { 329 e.onFatalErr(errors.New("remote server cancelled watch"), "during campaign watch") 330 return false 331 } 332 if err := resp.Err(); err != nil { 333 e.onFatalErr(err, "during campaign watch, remote server returned err") 334 return false 335 } 336 337 // Watch for changes in leadership 338 for _, event := range resp.Events { 339 if event.Type == etcd.EventTypeDelete || event.Type == etcd.EventTypePut { 340 // If the key is for our current leader 341 if bytes.Equal(event.Kv.Key, leaderKV.Key) { 342 // Check our leadership status 343 resp, err := e.getLeader(e.ctx) 344 if err != nil { 345 e.onFatalErr(err, "while querying for new leader") 346 return false 347 } 348 349 // If we have no leader 350 if resp == nil { 351 e.onFatalErr(err, "After etcd event no leader was found, restarting election") 352 return false 353 } 354 // Notify if leadership has changed 355 if !bytes.Equal(resp.Key, leaderKV.Key) { 356 leaderKV = resp 357 e.onLeaderChange(leaderKV) 358 } 359 } 360 } 361 } 362 case <-done: 363 _ = watcher.Close() 364 // If withdraw takes longer than our TTL then lease is expired 365 // and we are no longer leader anyway. 366 ctx, cancel := context.WithTimeout(context.Background(), e.ttl) 367 368 // Withdraw our candidacy because of shutdown 369 if err := e.withDrawCampaign(ctx); err != nil { 370 e.onErr(err, "") 371 } 372 e.onLeaderChange(&mvccpb.KeyValue{}) 373 cancel() 374 return false 375 } 376 return true 377 }) 378 return nil 379 } 380 381 func (e *Election) onLeaderChange(kv *mvccpb.KeyValue) { 382 event := ElectionEvent{} 383 if kv != nil { 384 if string(kv.Key) == e.key { 385 atomic.StoreInt32(&e.isLeader, 1) 386 event.IsLeader = true 387 } else { 388 atomic.StoreInt32(&e.isLeader, 0) 389 } 390 event.LeaderKey = string(kv.Key) 391 event.LeaderData = string(kv.Value) 392 } else { 393 event.IsDone = true 394 } 395 if e.observer != nil { 396 e.observer(event) 397 } 398 } 399 400 // onErr reports errors the the observer 401 func (e *Election) onErr(err error, msg string) { 402 atomic.StoreInt32(&e.isLeader, 0) 403 if msg != "" { 404 err = errors.Wrap(err, msg) 405 } 406 if e.observer != nil { 407 e.observer(ElectionEvent{Err: err}) 408 } 409 } 410 411 // onFatalErr reports errors to the observer and resets the election and session 412 func (e *Election) onFatalErr(err error, msg string) { 413 e.onErr(err, msg) 414 // We call this in a go routine to avoid blocking on `Stop()` calls 415 go e.session.Reset() 416 } 417 418 // Close cancels the election and concedes the election if we are leader 419 func (e *Election) Close() { 420 e.session.Close() 421 e.wg.Wait() 422 // Emit the `Done:true` event 423 e.onLeaderChange(nil) 424 } 425 426 // IsLeader returns true if we are leader. It only makes sense if the election 427 // was created with NewElection that block until the initial election is over. 428 func (e *Election) IsLeader() bool { 429 return atomic.LoadInt32(&e.isLeader) == 1 430 } 431 432 // Concede concedes leadership if we are leader and restarts the campaign returns true. 433 // if we are not leader do nothing and return false. If you want to concede leadership 434 // and cancel the campaign call Close() instead. 435 func (e *Election) Concede() (bool, error) { 436 isLeader := atomic.LoadInt32(&e.isLeader) 437 if isLeader == 0 { 438 return false, nil 439 } 440 oldCampaignKey := e.key 441 e.session.Reset() 442 443 // Ensure there are no lingering candidates 444 ctx, cancel := context.WithTimeout(context.Background(), e.ttl) 445 cancel() 446 447 _, err := e.client.Delete(ctx, oldCampaignKey) 448 if err != nil { 449 return true, errors.Wrapf(err, "while cleaning up campaign '%s'", oldCampaignKey) 450 } 451 452 return true, nil 453 } 454 455 type AlwaysLeaderMock struct{} 456 457 func (s *AlwaysLeaderMock) IsLeader() bool { return true } 458 func (s *AlwaysLeaderMock) Concede() (bool, error) { return true, nil } 459 func (s *AlwaysLeaderMock) Close() {}