github.com/Pankov404/juju@v0.0.0-20150703034450-be266991dceb/state/leadership/manager.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package leadership
     5  
     6  import (
     7  	"sort"
     8  	"time"
     9  
    10  	"github.com/juju/errors"
    11  	"github.com/juju/loggo"
    12  	"launchpad.net/tomb"
    13  
    14  	"github.com/juju/juju/state/lease"
    15  )
    16  
    17  var logger = loggo.GetLogger("juju.state.leadership")
    18  
    19  // NewManager returns a Manager implementation, backed by a lease.Client,
    20  // which (in addition to its exposed Manager capabilities) will expire all
    21  // known leases as they run out. The caller takes responsibility for killing,
    22  // and handling errors from, the returned Worker.
    23  func NewManager(config ManagerConfig) (ManagerWorker, error) {
    24  	if err := config.Validate(); err != nil {
    25  		return nil, errors.Trace(err)
    26  	}
    27  	manager := &manager{
    28  		config: config,
    29  		claims: make(chan claim),
    30  		checks: make(chan check),
    31  		blocks: make(chan block),
    32  	}
    33  	go func() {
    34  		defer manager.tomb.Done()
    35  		// note: we don't directly tomb.Kill, because we may need to
    36  		// unwrap tomb.ErrDying in order to function correctly.
    37  		manager.kill(manager.loop())
    38  	}()
    39  	return manager, nil
    40  }
    41  
    42  // manager implements ManagerWorker.
    43  type manager struct {
    44  	tomb tomb.Tomb
    45  
    46  	// config collects all external configuration and dependencies.
    47  	config ManagerConfig
    48  
    49  	// claims is used to deliver leadership claim requests to the loop.
    50  	claims chan claim
    51  
    52  	// checks is used to deliver leadership check requests to the loop.
    53  	checks chan check
    54  
    55  	// blocks is used to deliver leaderlessness block requests to the loop.
    56  	blocks chan block
    57  }
    58  
    59  // Kill is part of the worker.Worker interface.
    60  func (manager *manager) Kill() {
    61  	manager.kill(nil)
    62  }
    63  
    64  // kill unwraps tomb.ErrDying before killing the tomb, thus allowing the worker
    65  // to use errors.Trace liberally and still stop cleanly.
    66  func (manager *manager) kill(err error) {
    67  	if errors.Cause(err) == tomb.ErrDying {
    68  		err = tomb.ErrDying
    69  	} else if err != nil {
    70  		logger.Errorf("stopping leadership manager with error: %v", err)
    71  	}
    72  	manager.tomb.Kill(err)
    73  }
    74  
    75  // Wait is part of the worker.Worker interface.
    76  func (manager *manager) Wait() error {
    77  	return manager.tomb.Wait()
    78  }
    79  
    80  // loop runs until the manager is stopped.
    81  func (manager *manager) loop() error {
    82  	blocks := make(blocks)
    83  	for {
    84  		if err := manager.choose(blocks); err != nil {
    85  			return errors.Trace(err)
    86  		}
    87  
    88  		leases := manager.config.Client.Leases()
    89  		for serviceName := range blocks {
    90  			if _, found := leases[serviceName]; !found {
    91  				blocks.unblock(serviceName)
    92  			}
    93  		}
    94  	}
    95  }
    96  
    97  // choose breaks the select out of loop to make the blocking logic clearer.
    98  func (manager *manager) choose(blocks blocks) error {
    99  	select {
   100  	case <-manager.tomb.Dying():
   101  		return tomb.ErrDying
   102  	case <-manager.nextExpiry():
   103  		return manager.expire()
   104  	case claim := <-manager.claims:
   105  		return manager.handleClaim(claim)
   106  	case check := <-manager.checks:
   107  		return manager.handleCheck(check)
   108  	case block := <-manager.blocks:
   109  		blocks.add(block)
   110  		return nil
   111  	}
   112  }
   113  
   114  // ClaimLeadership is part of the leadership.Manager interface.
   115  func (manager *manager) ClaimLeadership(serviceName, unitName string, duration time.Duration) error {
   116  	return claim{
   117  		serviceName: serviceName,
   118  		unitName:    unitName,
   119  		duration:    duration,
   120  		response:    make(chan bool),
   121  		abort:       manager.tomb.Dying(),
   122  	}.invoke(manager.claims)
   123  }
   124  
   125  // handleClaim processes and responds to the supplied claim. It will only return
   126  // unrecoverable errors; mere failure to claim just indicates a bad request, and
   127  // is communicated back to the claim's originator.
   128  func (manager *manager) handleClaim(claim claim) error {
   129  	client := manager.config.Client
   130  	request := lease.Request{claim.unitName, claim.duration}
   131  	err := lease.ErrInvalid
   132  	for err == lease.ErrInvalid {
   133  		select {
   134  		case <-manager.tomb.Dying():
   135  			return tomb.ErrDying
   136  		default:
   137  			info, found := client.Leases()[claim.serviceName]
   138  			switch {
   139  			case !found:
   140  				err = client.ClaimLease(claim.serviceName, request)
   141  			case info.Holder == claim.unitName:
   142  				err = client.ExtendLease(claim.serviceName, request)
   143  			default:
   144  				claim.respond(false)
   145  				return nil
   146  			}
   147  		}
   148  	}
   149  	if err != nil {
   150  		return errors.Trace(err)
   151  	}
   152  	claim.respond(true)
   153  	return nil
   154  }
   155  
   156  // CheckLeadership is part of the leadership.Manager interface.
   157  func (manager *manager) CheckLeadership(serviceName, unitName string) (Token, error) {
   158  	return check{
   159  		serviceName: serviceName,
   160  		unitName:    unitName,
   161  		response:    make(chan Token),
   162  		abort:       manager.tomb.Dying(),
   163  	}.invoke(manager.checks)
   164  }
   165  
   166  // handleCheck processes and responds to the supplied check. It will only return
   167  // unrecoverable errors; mere untruth of the assertion just indicates a bad
   168  // request, and is communicated back to the check's originator.
   169  func (manager *manager) handleCheck(check check) error {
   170  	client := manager.config.Client
   171  	info, found := client.Leases()[check.serviceName]
   172  	if !found || info.Holder != check.unitName {
   173  		if err := client.Refresh(); err != nil {
   174  			return errors.Trace(err)
   175  		}
   176  		info, found = client.Leases()[check.serviceName]
   177  	}
   178  	var result Token
   179  	if found && info.Holder == check.unitName {
   180  		result = token{info.AssertOp}
   181  	}
   182  	check.respond(result)
   183  	return nil
   184  }
   185  
   186  // BlockUntilLeadershipReleased is part of the leadership.Manager interface.
   187  func (manager *manager) BlockUntilLeadershipReleased(serviceName string) error {
   188  	return block{
   189  		serviceName: serviceName,
   190  		unblock:     make(chan struct{}),
   191  		abort:       manager.tomb.Dying(),
   192  	}.invoke(manager.blocks)
   193  }
   194  
   195  // nextExpiry returns a channel that will send a value at some point when we
   196  // expect at least one lease to be ready to expire. If no leases are known,
   197  // it will return nil.
   198  func (manager *manager) nextExpiry() <-chan time.Time {
   199  	var nextExpiry *time.Time
   200  	for _, info := range manager.config.Client.Leases() {
   201  		if nextExpiry != nil {
   202  			if info.Expiry.After(*nextExpiry) {
   203  				continue
   204  			}
   205  		}
   206  		nextExpiry = &info.Expiry
   207  	}
   208  	if nextExpiry == nil {
   209  		logger.Debugf("no leases recorded; never waking for expiry")
   210  		return nil
   211  	}
   212  	logger.Debugf("waking to expire leases at %s", *nextExpiry)
   213  	return manager.config.Clock.Alarm(*nextExpiry)
   214  }
   215  
   216  // expire will attempt to expire all leases that may have expired. There might
   217  // be none; they might have been extended or expired already by someone else; so
   218  // ErrInvalid is expected, and ignored, in the comfortable knowledge that the
   219  // client will have been updated and we'll see fresh info when we scan for new
   220  // expiries next time through the loop. It will return only unrecoverable errors.
   221  func (manager *manager) expire() error {
   222  	client := manager.config.Client
   223  	leases := client.Leases()
   224  
   225  	// Sort lease names so we expire in a predictable order for the tests.
   226  	names := make([]string, 0, len(leases))
   227  	for name := range leases {
   228  		names = append(names, name)
   229  	}
   230  	sort.Strings(names)
   231  	for _, name := range names {
   232  		now := manager.config.Clock.Now()
   233  		if leases[name].Expiry.After(now) {
   234  			continue
   235  		}
   236  		switch err := client.ExpireLease(name); err {
   237  		case nil, lease.ErrInvalid:
   238  		default:
   239  			return errors.Trace(err)
   240  		}
   241  	}
   242  	return nil
   243  }