github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/worker/leadership/tracker.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package leadership
     5  
     6  import (
     7  	"time"
     8  
     9  	"github.com/juju/errors"
    10  	"github.com/juju/loggo"
    11  	"github.com/juju/names"
    12  	"launchpad.net/tomb"
    13  
    14  	"github.com/juju/juju/leadership"
    15  )
    16  
    17  var logger = loggo.GetLogger("juju.worker.leadership")
    18  
    19  // tracker implements TrackerWorker.
    20  type tracker struct {
    21  	tomb        tomb.Tomb
    22  	claimer     leadership.Claimer
    23  	unitName    string
    24  	serviceName string
    25  	duration    time.Duration
    26  	isMinion    bool
    27  
    28  	claimLease        chan struct{}
    29  	renewLease        <-chan time.Time
    30  	claimTickets      chan chan bool
    31  	waitLeaderTickets chan chan bool
    32  	waitMinionTickets chan chan bool
    33  	waitingLeader     []chan bool
    34  	waitingMinion     []chan bool
    35  }
    36  
    37  // NewTrackerWorker returns a TrackerWorker that attempts to claim and retain
    38  // service leadership for the supplied unit. It will claim leadership for twice
    39  // the supplied duration, and once it's leader it will renew leadership every
    40  // time the duration elapses.
    41  // Thus, successful leadership claims on the resulting Tracker will guarantee
    42  // leadership for the duration supplied here without generating additional calls
    43  // to the supplied manager (which may very well be on the other side of a
    44  // network connection).
    45  func NewTrackerWorker(tag names.UnitTag, claimer leadership.Claimer, duration time.Duration) TrackerWorker {
    46  	unitName := tag.Id()
    47  	serviceName, _ := names.UnitService(unitName)
    48  	t := &tracker{
    49  		unitName:          unitName,
    50  		serviceName:       serviceName,
    51  		claimer:           claimer,
    52  		duration:          duration,
    53  		claimTickets:      make(chan chan bool),
    54  		waitLeaderTickets: make(chan chan bool),
    55  		waitMinionTickets: make(chan chan bool),
    56  	}
    57  	go func() {
    58  		defer t.tomb.Done()
    59  		defer func() {
    60  			for _, ticketCh := range t.waitingLeader {
    61  				close(ticketCh)
    62  			}
    63  			for _, ticketCh := range t.waitingMinion {
    64  				close(ticketCh)
    65  			}
    66  		}()
    67  		err := t.loop()
    68  		// TODO: jam 2015-04-02 is this the most elegant way to make
    69  		// sure we shutdown cleanly? Essentially the lowest level sees
    70  		// that we are dying, and propagates an ErrDying up to us so
    71  		// that we shut down, which we then are passing back into
    72  		// Tomb.Kill().
    73  		// Tomb.Kill() special cases the exact object ErrDying, and has
    74  		// no idea about errors.Cause and the general errors.Trace
    75  		// mechanisms that we use.
    76  		// So we explicitly unwrap before calling tomb.Kill() else
    77  		// tomb.Stop() thinks that we have a genuine error.
    78  		switch cause := errors.Cause(err); cause {
    79  		case tomb.ErrDying:
    80  			err = cause
    81  		}
    82  		t.tomb.Kill(err)
    83  	}()
    84  	return t
    85  }
    86  
    87  // Kill is part of the worker.Worker interface.
    88  func (t *tracker) Kill() {
    89  	t.tomb.Kill(nil)
    90  }
    91  
    92  // Wait is part of the worker.Worker interface.
    93  func (t *tracker) Wait() error {
    94  	return t.tomb.Wait()
    95  }
    96  
    97  // ServiceName is part of the Tracker interface.
    98  func (t *tracker) ServiceName() string {
    99  	return t.serviceName
   100  }
   101  
   102  // ClaimDuration is part of the Tracker interface.
   103  func (t *tracker) ClaimDuration() time.Duration {
   104  	return t.duration
   105  }
   106  
   107  // ClaimLeader is part of the Tracker interface.
   108  func (t *tracker) ClaimLeader() Ticket {
   109  	return t.submit(t.claimTickets)
   110  }
   111  
   112  // WaitLeader is part of the Tracker interface.
   113  func (t *tracker) WaitLeader() Ticket {
   114  	return t.submit(t.waitLeaderTickets)
   115  }
   116  
   117  // WaitMinion is part of the Tracker interface.
   118  func (t *tracker) WaitMinion() Ticket {
   119  	return t.submit(t.waitMinionTickets)
   120  }
   121  
   122  func (t *tracker) loop() error {
   123  	logger.Debugf("%s making initial claim for %s leadership", t.unitName, t.serviceName)
   124  	if err := t.refresh(); err != nil {
   125  		return errors.Trace(err)
   126  	}
   127  	for {
   128  		select {
   129  		case <-t.tomb.Dying():
   130  			return tomb.ErrDying
   131  		case <-t.claimLease:
   132  			logger.Debugf("%s claiming lease for %s leadership", t.unitName, t.serviceName)
   133  			t.claimLease = nil
   134  			if err := t.refresh(); err != nil {
   135  				return errors.Trace(err)
   136  			}
   137  		case <-t.renewLease:
   138  			logger.Debugf("%s renewing lease for %s leadership", t.unitName, t.serviceName)
   139  			t.renewLease = nil
   140  			if err := t.refresh(); err != nil {
   141  				return errors.Trace(err)
   142  			}
   143  		case ticketCh := <-t.claimTickets:
   144  			logger.Debugf("%s got claim request for %s leadership", t.unitName, t.serviceName)
   145  			if err := t.resolveClaim(ticketCh); err != nil {
   146  				return errors.Trace(err)
   147  			}
   148  		case ticketCh := <-t.waitLeaderTickets:
   149  			logger.Debugf("%s got wait request for %s leadership", t.unitName, t.serviceName)
   150  			if err := t.resolveWaitLeader(ticketCh); err != nil {
   151  				return errors.Trace(err)
   152  			}
   153  		case ticketCh := <-t.waitMinionTickets:
   154  			logger.Debugf("%s got wait request for %s leadership loss", t.unitName, t.serviceName)
   155  			if err := t.resolveWaitMinion(ticketCh); err != nil {
   156  				return errors.Trace(err)
   157  			}
   158  		}
   159  	}
   160  }
   161  
   162  // refresh makes a leadership request, and updates tracker state to conform to
   163  // latest known reality.
   164  func (t *tracker) refresh() error {
   165  	logger.Debugf("checking %s for %s leadership", t.unitName, t.serviceName)
   166  	leaseDuration := 2 * t.duration
   167  	untilTime := time.Now().Add(leaseDuration)
   168  	err := t.claimer.ClaimLeadership(t.serviceName, t.unitName, leaseDuration)
   169  	switch {
   170  	case err == nil:
   171  		return t.setLeader(untilTime)
   172  	case errors.Cause(err) == leadership.ErrClaimDenied:
   173  		return t.setMinion()
   174  	}
   175  	return errors.Annotatef(err, "leadership failure")
   176  }
   177  
   178  // setLeader arranges for lease renewal.
   179  func (t *tracker) setLeader(untilTime time.Time) error {
   180  	logger.Debugf("%s confirmed for %s leadership until %s", t.unitName, t.serviceName, untilTime)
   181  	renewTime := untilTime.Add(-t.duration)
   182  	logger.Infof("%s will renew %s leadership at %s", t.unitName, t.serviceName, renewTime)
   183  	t.isMinion = false
   184  	t.claimLease = nil
   185  	t.renewLease = time.After(renewTime.Sub(time.Now()))
   186  
   187  	for len(t.waitingLeader) > 0 {
   188  		logger.Debugf("notifying %s ticket of impending %s leadership", t.unitName, t.serviceName)
   189  		var ticketCh chan bool
   190  		ticketCh, t.waitingLeader = t.waitingLeader[0], t.waitingLeader[1:]
   191  		defer close(ticketCh)
   192  		if err := t.sendTrue(ticketCh); err != nil {
   193  			return errors.Trace(err)
   194  		}
   195  	}
   196  	return nil
   197  }
   198  
   199  // setMinion arranges for lease acquisition when there's an opportunity.
   200  func (t *tracker) setMinion() error {
   201  	logger.Infof("%s leadership for %s denied", t.serviceName, t.unitName)
   202  	t.isMinion = true
   203  	t.renewLease = nil
   204  	if t.claimLease == nil {
   205  		t.claimLease = make(chan struct{})
   206  		go func() {
   207  			defer close(t.claimLease)
   208  			logger.Debugf("%s waiting for %s leadership release", t.unitName, t.serviceName)
   209  			err := t.claimer.BlockUntilLeadershipReleased(t.serviceName)
   210  			if err != nil {
   211  				logger.Warningf("error while %s waiting for %s leadership release: %v", t.unitName, t.serviceName, err)
   212  			}
   213  			// We don't need to do anything else with the error, because we just
   214  			// close the claimLease channel and trigger a leadership claim on the
   215  			// main loop; if anything's gone seriously wrong we'll find out right
   216  			// away and shut down anyway. (And if this goroutine outlives the
   217  			// tracker, it keeps it around as a zombie, but I don't see a way
   218  			// around that...)
   219  		}()
   220  	}
   221  
   222  	for len(t.waitingMinion) > 0 {
   223  		logger.Debugf("notifying %s ticket of impending loss of %s leadership", t.unitName, t.serviceName)
   224  		var ticketCh chan bool
   225  		ticketCh, t.waitingMinion = t.waitingMinion[0], t.waitingMinion[1:]
   226  		defer close(ticketCh)
   227  		if err := t.sendTrue(ticketCh); err != nil {
   228  			return errors.Trace(err)
   229  		}
   230  	}
   231  	return nil
   232  }
   233  
   234  // isLeader returns true if leadership is guaranteed for the tracker's duration.
   235  func (t *tracker) isLeader() (bool, error) {
   236  	if !t.isMinion {
   237  		// Last time we looked, we were leader.
   238  		select {
   239  		case <-t.tomb.Dying():
   240  			return false, errors.Trace(tomb.ErrDying)
   241  		case <-t.renewLease:
   242  			logger.Debugf("%s renewing lease for %s leadership", t.unitName, t.serviceName)
   243  			t.renewLease = nil
   244  			if err := t.refresh(); err != nil {
   245  				return false, errors.Trace(err)
   246  			}
   247  		default:
   248  			logger.Debugf("%s still has %s leadership", t.unitName, t.serviceName)
   249  		}
   250  	}
   251  	return !t.isMinion, nil
   252  }
   253  
   254  // resolveClaim will send true on the supplied channel if leadership can be
   255  // successfully verified, and will always close it whether or not it sent.
   256  func (t *tracker) resolveClaim(ticketCh chan bool) error {
   257  	logger.Debugf("resolving %s leadership ticket for %s...", t.serviceName, t.unitName)
   258  	defer close(ticketCh)
   259  	if leader, err := t.isLeader(); err != nil {
   260  		return errors.Trace(err)
   261  	} else if !leader {
   262  		logger.Debugf("%s is not %s leader", t.unitName, t.serviceName)
   263  		return nil
   264  	}
   265  	logger.Debugf("confirming %s leadership for %s", t.serviceName, t.unitName)
   266  	return t.sendTrue(ticketCh)
   267  }
   268  
   269  // resolveWaitLeader will send true on the supplied channel if leadership can be
   270  // guaranteed for the tracker's duration. It will then close the channel. If
   271  // leadership cannot be guaranteed, the channel is left untouched until either
   272  // the termination of the tracker or the next invocation of setLeader; at which
   273  // point true is sent if applicable, and the channel is closed.
   274  func (t *tracker) resolveWaitLeader(ticketCh chan bool) error {
   275  	var dontClose bool
   276  	defer func() {
   277  		if !dontClose {
   278  			close(ticketCh)
   279  		}
   280  	}()
   281  
   282  	if leader, err := t.isLeader(); err != nil {
   283  		return errors.Trace(err)
   284  	} else if leader {
   285  		logger.Debugf("reporting %s leadership for %s", t.serviceName, t.unitName)
   286  		return t.sendTrue(ticketCh)
   287  	}
   288  
   289  	logger.Debugf("waiting for %s to attain %s leadership", t.unitName, t.serviceName)
   290  	t.waitingLeader = append(t.waitingLeader, ticketCh)
   291  	dontClose = true
   292  	return nil
   293  }
   294  
   295  // resolveWaitMinion will close the supplied channel as soon as leadership cannot
   296  // be guaranteed beyond the tracker's duration.
   297  func (t *tracker) resolveWaitMinion(ticketCh chan bool) error {
   298  	var dontClose bool
   299  	defer func() {
   300  		if !dontClose {
   301  			close(ticketCh)
   302  		}
   303  	}()
   304  
   305  	if leader, err := t.isLeader(); err != nil {
   306  		return errors.Trace(err)
   307  	} else if leader {
   308  		logger.Debugf("waiting for %s to lose %s leadership", t.unitName, t.serviceName)
   309  		t.waitingMinion = append(t.waitingMinion, ticketCh)
   310  		dontClose = true
   311  	} else {
   312  		logger.Debugf("reporting %s leadership loss for %s", t.serviceName, t.unitName)
   313  	}
   314  	return nil
   315  
   316  }
   317  
   318  func (t *tracker) sendTrue(ticketCh chan bool) error {
   319  	select {
   320  	case <-t.tomb.Dying():
   321  		return tomb.ErrDying
   322  	case ticketCh <- true:
   323  		return nil
   324  	}
   325  }
   326  
   327  func (t *tracker) submit(tickets chan chan bool) Ticket {
   328  	ticketCh := make(chan bool, 1)
   329  	select {
   330  	case <-t.tomb.Dying():
   331  		close(ticketCh)
   332  	case tickets <- ticketCh:
   333  	}
   334  	ticket := &ticket{
   335  		ch:    ticketCh,
   336  		ready: make(chan struct{}),
   337  	}
   338  	go ticket.run()
   339  	return ticket
   340  }
   341  
   342  // ticket is used with tracker to communicate leadership status back to a client.
   343  type ticket struct {
   344  	ch      chan bool
   345  	ready   chan struct{}
   346  	success bool
   347  }
   348  
   349  func (t *ticket) run() {
   350  	defer close(t.ready)
   351  	// This is only safe/sane because the tracker promises to close all pending
   352  	// ticket channels when it shuts down.
   353  	if <-t.ch {
   354  		t.success = true
   355  	}
   356  }
   357  
   358  // Ready is part of the Ticket interface.
   359  func (t *ticket) Ready() <-chan struct{} {
   360  	return t.ready
   361  }
   362  
   363  // Wait is part of the Ticket interface.
   364  func (t *ticket) Wait() bool {
   365  	<-t.ready
   366  	return t.success
   367  }