github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/worker/leadership/tracker.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package leadership
     5  
     6  import (
     7  	"time"
     8  
     9  	"github.com/juju/errors"
    10  	"github.com/juju/loggo"
    11  	"github.com/juju/names"
    12  	"launchpad.net/tomb"
    13  
    14  	"github.com/juju/juju/core/leadership"
    15  )
    16  
    17  var logger = loggo.GetLogger("juju.worker.leadership")
    18  
    19  type Tracker struct {
    20  	tomb        tomb.Tomb
    21  	claimer     leadership.Claimer
    22  	unitName    string
    23  	serviceName string
    24  	duration    time.Duration
    25  	isMinion    bool
    26  
    27  	claimLease        chan struct{}
    28  	renewLease        <-chan time.Time
    29  	claimTickets      chan chan bool
    30  	waitLeaderTickets chan chan bool
    31  	waitMinionTickets chan chan bool
    32  	waitingLeader     []chan bool
    33  	waitingMinion     []chan bool
    34  }
    35  
    36  // NewTracker returns a *Tracker that attempts to claim and retain service
    37  // leadership for the supplied unit. It will claim leadership for twice the
    38  // supplied duration, and once it's leader it will renew leadership every
    39  // time the duration elapses.
    40  // Thus, successful leadership claims on the resulting Tracker will guarantee
    41  // leadership for the duration supplied here without generating additional
    42  // calls to the supplied manager (which may very well be on the other side of
    43  // a network connection).
    44  func NewTracker(tag names.UnitTag, claimer leadership.Claimer, duration time.Duration) *Tracker {
    45  	unitName := tag.Id()
    46  	serviceName, _ := names.UnitService(unitName)
    47  	t := &Tracker{
    48  		unitName:          unitName,
    49  		serviceName:       serviceName,
    50  		claimer:           claimer,
    51  		duration:          duration,
    52  		claimTickets:      make(chan chan bool),
    53  		waitLeaderTickets: make(chan chan bool),
    54  		waitMinionTickets: make(chan chan bool),
    55  	}
    56  	go func() {
    57  		defer t.tomb.Done()
    58  		defer func() {
    59  			for _, ticketCh := range t.waitingLeader {
    60  				close(ticketCh)
    61  			}
    62  			for _, ticketCh := range t.waitingMinion {
    63  				close(ticketCh)
    64  			}
    65  		}()
    66  		err := t.loop()
    67  		// TODO: jam 2015-04-02 is this the most elegant way to make
    68  		// sure we shutdown cleanly? Essentially the lowest level sees
    69  		// that we are dying, and propagates an ErrDying up to us so
    70  		// that we shut down, which we then are passing back into
    71  		// Tomb.Kill().
    72  		// Tomb.Kill() special cases the exact object ErrDying, and has
    73  		// no idea about errors.Cause and the general errors.Trace
    74  		// mechanisms that we use.
    75  		// So we explicitly unwrap before calling tomb.Kill() else
    76  		// tomb.Stop() thinks that we have a genuine error.
    77  		switch cause := errors.Cause(err); cause {
    78  		case tomb.ErrDying:
    79  			err = cause
    80  		}
    81  		t.tomb.Kill(err)
    82  	}()
    83  	return t
    84  }
    85  
    86  // Kill is part of the worker.Worker interface.
    87  func (t *Tracker) Kill() {
    88  	t.tomb.Kill(nil)
    89  }
    90  
    91  // Wait is part of the worker.Worker interface.
    92  func (t *Tracker) Wait() error {
    93  	return t.tomb.Wait()
    94  }
    95  
    96  // ServiceName is part of the leadership.Tracker interface.
    97  func (t *Tracker) ServiceName() string {
    98  	return t.serviceName
    99  }
   100  
   101  // ClaimDuration is part of the leadership.Tracker interface.
   102  func (t *Tracker) ClaimDuration() time.Duration {
   103  	return t.duration
   104  }
   105  
   106  // ClaimLeader is part of the leadership.Tracker interface.
   107  func (t *Tracker) ClaimLeader() leadership.Ticket {
   108  	return t.submit(t.claimTickets)
   109  }
   110  
   111  // WaitLeader is part of the leadership.Tracker interface.
   112  func (t *Tracker) WaitLeader() leadership.Ticket {
   113  	return t.submit(t.waitLeaderTickets)
   114  }
   115  
   116  // WaitMinion is part of the leadership.Tracker interface.
   117  func (t *Tracker) WaitMinion() leadership.Ticket {
   118  	return t.submit(t.waitMinionTickets)
   119  }
   120  
   121  func (t *Tracker) loop() error {
   122  	logger.Debugf("%s making initial claim for %s leadership", t.unitName, t.serviceName)
   123  	if err := t.refresh(); err != nil {
   124  		return errors.Trace(err)
   125  	}
   126  	for {
   127  		select {
   128  		case <-t.tomb.Dying():
   129  			return tomb.ErrDying
   130  		case <-t.claimLease:
   131  			logger.Debugf("%s claiming lease for %s leadership", t.unitName, t.serviceName)
   132  			t.claimLease = nil
   133  			if err := t.refresh(); err != nil {
   134  				return errors.Trace(err)
   135  			}
   136  		case <-t.renewLease:
   137  			logger.Debugf("%s renewing lease for %s leadership", t.unitName, t.serviceName)
   138  			t.renewLease = nil
   139  			if err := t.refresh(); err != nil {
   140  				return errors.Trace(err)
   141  			}
   142  		case ticketCh := <-t.claimTickets:
   143  			logger.Debugf("%s got claim request for %s leadership", t.unitName, t.serviceName)
   144  			if err := t.resolveClaim(ticketCh); err != nil {
   145  				return errors.Trace(err)
   146  			}
   147  		case ticketCh := <-t.waitLeaderTickets:
   148  			logger.Debugf("%s got wait request for %s leadership", t.unitName, t.serviceName)
   149  			if err := t.resolveWaitLeader(ticketCh); err != nil {
   150  				return errors.Trace(err)
   151  			}
   152  		case ticketCh := <-t.waitMinionTickets:
   153  			logger.Debugf("%s got wait request for %s leadership loss", t.unitName, t.serviceName)
   154  			if err := t.resolveWaitMinion(ticketCh); err != nil {
   155  				return errors.Trace(err)
   156  			}
   157  		}
   158  	}
   159  }
   160  
   161  // refresh makes a leadership request, and updates Tracker state to conform to
   162  // latest known reality.
   163  func (t *Tracker) refresh() error {
   164  	logger.Debugf("checking %s for %s leadership", t.unitName, t.serviceName)
   165  	leaseDuration := 2 * t.duration
   166  	// TODO(fwereade): 2016-03-17 lp:1558657
   167  	untilTime := time.Now().Add(leaseDuration)
   168  	err := t.claimer.ClaimLeadership(t.serviceName, t.unitName, leaseDuration)
   169  	switch {
   170  	case err == nil:
   171  		return t.setLeader(untilTime)
   172  	case errors.Cause(err) == leadership.ErrClaimDenied:
   173  		return t.setMinion()
   174  	}
   175  	return errors.Annotatef(err, "leadership failure")
   176  }
   177  
   178  // setLeader arranges for lease renewal.
   179  func (t *Tracker) setLeader(untilTime time.Time) error {
   180  	logger.Debugf("%s confirmed for %s leadership until %s", t.unitName, t.serviceName, untilTime)
   181  	renewTime := untilTime.Add(-t.duration)
   182  	logger.Infof("%s will renew %s leadership at %s", t.unitName, t.serviceName, renewTime)
   183  	t.isMinion = false
   184  	t.claimLease = nil
   185  	// TODO(fwereade): 2016-03-17 lp:1558657
   186  	t.renewLease = time.After(renewTime.Sub(time.Now()))
   187  
   188  	for len(t.waitingLeader) > 0 {
   189  		logger.Debugf("notifying %s ticket of impending %s leadership", t.unitName, t.serviceName)
   190  		var ticketCh chan bool
   191  		ticketCh, t.waitingLeader = t.waitingLeader[0], t.waitingLeader[1:]
   192  		defer close(ticketCh)
   193  		if err := t.sendTrue(ticketCh); err != nil {
   194  			return errors.Trace(err)
   195  		}
   196  	}
   197  	return nil
   198  }
   199  
   200  // setMinion arranges for lease acquisition when there's an opportunity.
   201  func (t *Tracker) setMinion() error {
   202  	logger.Infof("%s leadership for %s denied", t.serviceName, t.unitName)
   203  	t.isMinion = true
   204  	t.renewLease = nil
   205  	if t.claimLease == nil {
   206  		t.claimLease = make(chan struct{})
   207  		go func() {
   208  			defer close(t.claimLease)
   209  			logger.Debugf("%s waiting for %s leadership release", t.unitName, t.serviceName)
   210  			err := t.claimer.BlockUntilLeadershipReleased(t.serviceName)
   211  			if err != nil {
   212  				logger.Warningf("error while %s waiting for %s leadership release: %v", t.unitName, t.serviceName, err)
   213  			}
   214  			// We don't need to do anything else with the error, because we just
   215  			// close the claimLease channel and trigger a leadership claim on the
   216  			// main loop; if anything's gone seriously wrong we'll find out right
   217  			// away and shut down anyway. (And if this goroutine outlives the
   218  			// Tracker, it keeps it around as a zombie, but I don't see a way
   219  			// around that...)
   220  		}()
   221  	}
   222  
   223  	for len(t.waitingMinion) > 0 {
   224  		logger.Debugf("notifying %s ticket of impending loss of %s leadership", t.unitName, t.serviceName)
   225  		var ticketCh chan bool
   226  		ticketCh, t.waitingMinion = t.waitingMinion[0], t.waitingMinion[1:]
   227  		defer close(ticketCh)
   228  		if err := t.sendTrue(ticketCh); err != nil {
   229  			return errors.Trace(err)
   230  		}
   231  	}
   232  	return nil
   233  }
   234  
   235  // isLeader returns true if leadership is guaranteed for the Tracker's duration.
   236  func (t *Tracker) isLeader() (bool, error) {
   237  	if !t.isMinion {
   238  		// Last time we looked, we were leader.
   239  		select {
   240  		case <-t.tomb.Dying():
   241  			return false, errors.Trace(tomb.ErrDying)
   242  		case <-t.renewLease:
   243  			logger.Debugf("%s renewing lease for %s leadership", t.unitName, t.serviceName)
   244  			t.renewLease = nil
   245  			if err := t.refresh(); err != nil {
   246  				return false, errors.Trace(err)
   247  			}
   248  		default:
   249  			logger.Debugf("%s still has %s leadership", t.unitName, t.serviceName)
   250  		}
   251  	}
   252  	return !t.isMinion, nil
   253  }
   254  
   255  // resolveClaim will send true on the supplied channel if leadership can be
   256  // successfully verified, and will always close it whether or not it sent.
   257  func (t *Tracker) resolveClaim(ticketCh chan bool) error {
   258  	logger.Debugf("resolving %s leadership ticket for %s...", t.serviceName, t.unitName)
   259  	defer close(ticketCh)
   260  	if leader, err := t.isLeader(); err != nil {
   261  		return errors.Trace(err)
   262  	} else if !leader {
   263  		logger.Debugf("%s is not %s leader", t.unitName, t.serviceName)
   264  		return nil
   265  	}
   266  	logger.Debugf("confirming %s leadership for %s", t.serviceName, t.unitName)
   267  	return t.sendTrue(ticketCh)
   268  }
   269  
   270  // resolveWaitLeader will send true on the supplied channel if leadership can be
   271  // guaranteed for the Tracker's duration. It will then close the channel. If
   272  // leadership cannot be guaranteed, the channel is left untouched until either
   273  // the termination of the Tracker or the next invocation of setLeader; at which
   274  // point true is sent if applicable, and the channel is closed.
   275  func (t *Tracker) resolveWaitLeader(ticketCh chan bool) error {
   276  	var dontClose bool
   277  	defer func() {
   278  		if !dontClose {
   279  			close(ticketCh)
   280  		}
   281  	}()
   282  
   283  	if leader, err := t.isLeader(); err != nil {
   284  		return errors.Trace(err)
   285  	} else if leader {
   286  		logger.Debugf("reporting %s leadership for %s", t.serviceName, t.unitName)
   287  		return t.sendTrue(ticketCh)
   288  	}
   289  
   290  	logger.Debugf("waiting for %s to attain %s leadership", t.unitName, t.serviceName)
   291  	t.waitingLeader = append(t.waitingLeader, ticketCh)
   292  	dontClose = true
   293  	return nil
   294  }
   295  
   296  // resolveWaitMinion will close the supplied channel as soon as leadership cannot
   297  // be guaranteed beyond the Tracker's duration.
   298  func (t *Tracker) resolveWaitMinion(ticketCh chan bool) error {
   299  	var dontClose bool
   300  	defer func() {
   301  		if !dontClose {
   302  			close(ticketCh)
   303  		}
   304  	}()
   305  
   306  	if leader, err := t.isLeader(); err != nil {
   307  		return errors.Trace(err)
   308  	} else if leader {
   309  		logger.Debugf("waiting for %s to lose %s leadership", t.unitName, t.serviceName)
   310  		t.waitingMinion = append(t.waitingMinion, ticketCh)
   311  		dontClose = true
   312  	} else {
   313  		logger.Debugf("reporting %s leadership loss for %s", t.serviceName, t.unitName)
   314  	}
   315  	return nil
   316  
   317  }
   318  
   319  func (t *Tracker) sendTrue(ticketCh chan bool) error {
   320  	select {
   321  	case <-t.tomb.Dying():
   322  		return tomb.ErrDying
   323  	case ticketCh <- true:
   324  		return nil
   325  	}
   326  }
   327  
   328  func (t *Tracker) submit(tickets chan chan bool) leadership.Ticket {
   329  	ticketCh := make(chan bool, 1)
   330  	select {
   331  	case <-t.tomb.Dying():
   332  		close(ticketCh)
   333  	case tickets <- ticketCh:
   334  	}
   335  	ticket := &ticket{
   336  		ch:    ticketCh,
   337  		ready: make(chan struct{}),
   338  	}
   339  	go ticket.run()
   340  	return ticket
   341  }
   342  
   343  // ticket is used by Tracker to communicate leadership status back to a client.
   344  type ticket struct {
   345  	ch      chan bool
   346  	ready   chan struct{}
   347  	success bool
   348  }
   349  
   350  func (t *ticket) run() {
   351  	defer close(t.ready)
   352  	// This is only safe/sane because the Tracker promises to close all pending
   353  	// ticket channels when it shuts down.
   354  	if <-t.ch {
   355  		t.success = true
   356  	}
   357  }
   358  
   359  // Ready is part of the leadership.Ticket interface.
   360  func (t *ticket) Ready() <-chan struct{} {
   361  	return t.ready
   362  }
   363  
   364  // Wait is part of the leadership.Ticket interface.
   365  func (t *ticket) Wait() bool {
   366  	<-t.ready
   367  	return t.success
   368  }