github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/state/lease/client.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package lease
     5  
     6  import (
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/juju/errors"
    11  	"github.com/juju/loggo"
    12  	jujutxn "github.com/juju/txn"
    13  	"gopkg.in/mgo.v2"
    14  	"gopkg.in/mgo.v2/bson"
    15  	"gopkg.in/mgo.v2/txn"
    16  
    17  	"github.com/juju/juju/mongo"
    18  )
    19  
    20  // NewClient returns a new Client using the supplied config, or an error. Any
    21  // of the following situations will prevent client creation:
    22  //  * invalid config
    23  //  * invalid clock data stored in the namespace
    24  //  * invalid lease data stored in the namespace
    25  // ...but a returned Client will hold a recent cache of lease data and be ready
    26  // to use.
    27  // Clients do not need to be cleaned up themselves, but they will not function
    28  // past the lifetime of their configured Mongo.
    29  func NewClient(config ClientConfig) (Client, error) {
    30  	if err := config.Validate(); err != nil {
    31  		return nil, errors.Trace(err)
    32  	}
    33  	loggerName := fmt.Sprintf("state.lease.%s.%s", config.Namespace, config.Id)
    34  	logger := loggo.GetLogger(loggerName)
    35  	client := &client{
    36  		config: config,
    37  		logger: logger,
    38  	}
    39  	if err := client.ensureClockDoc(); err != nil {
    40  		return nil, errors.Trace(err)
    41  	}
    42  	if err := client.Refresh(); err != nil {
    43  		return nil, errors.Trace(err)
    44  	}
    45  	return client, nil
    46  }
    47  
    48  // client implements the Client interface.
    49  type client struct {
    50  
    51  	// config holds resources and configuration necessary to store leases.
    52  	config ClientConfig
    53  
    54  	// logger holds a logger unique to this lease Client.
    55  	logger loggo.Logger
    56  
    57  	// entries records recent information about leases.
    58  	entries map[string]entry
    59  
    60  	// skews records recent information about remote writers' clocks.
    61  	skews map[string]Skew
    62  }
    63  
    64  // Leases is part of the Client interface.
    65  func (client *client) Leases() map[string]Info {
    66  	leases := make(map[string]Info)
    67  	for name, entry := range client.entries {
    68  		skew := client.skews[entry.writer]
    69  		leases[name] = Info{
    70  			Holder:   entry.holder,
    71  			Expiry:   skew.Latest(entry.expiry),
    72  			AssertOp: client.assertOp(name, entry.holder),
    73  		}
    74  	}
    75  	return leases
    76  }
    77  
    78  // ClaimLease is part of the Client interface.
    79  func (client *client) ClaimLease(name string, request Request) error {
    80  	return client.request(name, request, client.claimLeaseOps, "claiming")
    81  }
    82  
    83  // ExtendLease is part of the Client interface.
    84  func (client *client) ExtendLease(name string, request Request) error {
    85  	return client.request(name, request, client.extendLeaseOps, "extending")
    86  }
    87  
    88  // opsFunc is used to make the signature of the request method somewhat readable.
    89  type opsFunc func(name string, request Request) ([]txn.Op, entry, error)
    90  
    91  // request implements ClaimLease and ExtendLease.
    92  func (client *client) request(name string, request Request, getOps opsFunc, verb string) error {
    93  	if err := validateString(name); err != nil {
    94  		return errors.Annotatef(err, "invalid name")
    95  	}
    96  	if err := request.Validate(); err != nil {
    97  		return errors.Annotatef(err, "invalid request")
    98  	}
    99  
   100  	// Close over cacheEntry to record in case of success.
   101  	var cacheEntry entry
   102  	err := client.config.Mongo.RunTransaction(func(attempt int) ([]txn.Op, error) {
   103  		client.logger.Tracef("%s lease %q for %s (attempt %d)", verb, name, request, attempt)
   104  
   105  		// On the first attempt, assume cache is good.
   106  		if attempt > 0 {
   107  			if err := client.Refresh(); err != nil {
   108  				return nil, errors.Trace(err)
   109  			}
   110  		}
   111  
   112  		// It's possible that the request is for an "extension" isn't an
   113  		// extension at all; this isn't a problem, but does require separate
   114  		// handling.
   115  		ops, nextEntry, err := getOps(name, request)
   116  		cacheEntry = nextEntry
   117  		if errors.Cause(err) == errNoExtension {
   118  			return nil, jujutxn.ErrNoOperations
   119  		}
   120  		if err != nil {
   121  			return nil, errors.Trace(err)
   122  		}
   123  		return ops, nil
   124  	})
   125  
   126  	// Unwrap ErrInvalid if necessary.
   127  	if errors.Cause(err) == ErrInvalid {
   128  		return ErrInvalid
   129  	}
   130  	if err != nil {
   131  		return errors.Trace(err)
   132  	}
   133  
   134  	// Update the cache for this lease only.
   135  	client.entries[name] = cacheEntry
   136  	return nil
   137  }
   138  
   139  // ExpireLease is part of the Client interface.
   140  func (client *client) ExpireLease(name string) error {
   141  	if err := validateString(name); err != nil {
   142  		return errors.Annotatef(err, "invalid name")
   143  	}
   144  
   145  	// No cache updates needed, only deletes; no closure here.
   146  	err := client.config.Mongo.RunTransaction(func(attempt int) ([]txn.Op, error) {
   147  		client.logger.Tracef("expiring lease %q (attempt %d)", name, attempt)
   148  
   149  		// On the first attempt, assume cache is good.
   150  		if attempt > 0 {
   151  			if err := client.Refresh(); err != nil {
   152  				return nil, errors.Trace(err)
   153  			}
   154  		}
   155  
   156  		// No special error handling here.
   157  		ops, err := client.expireLeaseOps(name)
   158  		if err != nil {
   159  			return nil, errors.Trace(err)
   160  		}
   161  		return ops, nil
   162  	})
   163  
   164  	// Unwrap ErrInvalid if necessary.
   165  	if errors.Cause(err) == ErrInvalid {
   166  		return ErrInvalid
   167  	}
   168  	if err != nil {
   169  		return errors.Trace(err)
   170  	}
   171  
   172  	// Uncache this lease entry.
   173  	delete(client.entries, name)
   174  	return nil
   175  }
   176  
   177  // Refresh is part of the Client interface.
   178  func (client *client) Refresh() error {
   179  	client.logger.Tracef("refreshing")
   180  
   181  	// Always read entries before skews, because skews are written before
   182  	// entries; we increase the risk of reading older skew data, but (should)
   183  	// eliminate the risk of reading an entry whose writer is not present
   184  	// in the skews data.
   185  	collection, closer := client.config.Mongo.GetCollection(client.config.Collection)
   186  	defer closer()
   187  	entries, err := client.readEntries(collection)
   188  	if err != nil {
   189  		return errors.Trace(err)
   190  	}
   191  	skews, err := client.readSkews(collection)
   192  	if err != nil {
   193  		return errors.Trace(err)
   194  	}
   195  
   196  	// Check we're not missing any required clock information before
   197  	// updating our local state.
   198  	for name, entry := range entries {
   199  		if _, found := skews[entry.writer]; !found {
   200  			return errors.Errorf("lease %q invalid: no clock data for %s", name, entry.writer)
   201  		}
   202  	}
   203  	client.skews = skews
   204  	client.entries = entries
   205  	return nil
   206  }
   207  
   208  // ensureClockDoc returns an error if it can neither find nor create a
   209  // valid clock document for the client's namespace.
   210  func (client *client) ensureClockDoc() error {
   211  	collection, closer := client.config.Mongo.GetCollection(client.config.Collection)
   212  	defer closer()
   213  
   214  	clockDocId := client.clockDocId()
   215  	err := client.config.Mongo.RunTransaction(func(attempt int) ([]txn.Op, error) {
   216  		client.logger.Tracef("checking clock %q (attempt %d)", clockDocId, attempt)
   217  		var clockDoc clockDoc
   218  		err := collection.FindId(clockDocId).One(&clockDoc)
   219  		if err == nil {
   220  			client.logger.Tracef("clock already exists")
   221  			if err := clockDoc.validate(); err != nil {
   222  				return nil, errors.Annotatef(err, "corrupt clock document")
   223  			}
   224  			return nil, jujutxn.ErrNoOperations
   225  		}
   226  		if err != mgo.ErrNotFound {
   227  			return nil, errors.Trace(err)
   228  		}
   229  		client.logger.Tracef("creating clock")
   230  		newClockDoc, err := newClockDoc(client.config.Namespace)
   231  		if err != nil {
   232  			return nil, errors.Trace(err)
   233  		}
   234  		return []txn.Op{{
   235  			C:      client.config.Collection,
   236  			Id:     clockDocId,
   237  			Assert: txn.DocMissing,
   238  			Insert: newClockDoc,
   239  		}}, nil
   240  	})
   241  	return errors.Trace(err)
   242  }
   243  
   244  // readEntries reads all lease data for the client's namespace.
   245  func (client *client) readEntries(collection mongo.Collection) (map[string]entry, error) {
   246  
   247  	// Read all lease documents in the client's namespace.
   248  	query := bson.M{
   249  		fieldType:      typeLease,
   250  		fieldNamespace: client.config.Namespace,
   251  	}
   252  	iter := collection.Find(query).Iter()
   253  
   254  	// Extract valid entries for each one.
   255  	entries := make(map[string]entry)
   256  	var leaseDoc leaseDoc
   257  	for iter.Next(&leaseDoc) {
   258  		name, entry, err := leaseDoc.entry()
   259  		if err != nil {
   260  			return nil, errors.Annotatef(err, "corrupt lease document %q", leaseDoc.Id)
   261  		}
   262  		entries[name] = entry
   263  	}
   264  	if err := iter.Close(); err != nil {
   265  		return nil, errors.Trace(err)
   266  	}
   267  	return entries, nil
   268  }
   269  
   270  // readSkews reads all clock data for the client's namespace.
   271  func (client *client) readSkews(collection mongo.Collection) (map[string]Skew, error) {
   272  
   273  	// Read the clock document, recording the time before and after completion.
   274  	readBefore := client.config.Clock.Now()
   275  	var clockDoc clockDoc
   276  	if err := collection.FindId(client.clockDocId()).One(&clockDoc); err != nil {
   277  		return nil, errors.Trace(err)
   278  	}
   279  	readAfter := client.config.Clock.Now()
   280  	if err := clockDoc.validate(); err != nil {
   281  		return nil, errors.Annotatef(err, "corrupt clock document")
   282  	}
   283  
   284  	// Create skew entries for each known writer...
   285  	skews, err := clockDoc.skews(readBefore, readAfter)
   286  	if err != nil {
   287  		return nil, errors.Trace(err)
   288  	}
   289  
   290  	// If a writer was previously known to us, and has not written since last
   291  	// time we read, we should keep the original skew, which is more accurate.
   292  	for writer, skew := range client.skews {
   293  		if skews[writer].LastWrite == skew.LastWrite {
   294  			skews[writer] = skew
   295  		}
   296  	}
   297  
   298  	// ...and overwrite our own with a zero skew, which will DTRT (assuming
   299  	// nobody's reusing client ids across machines with different clocks,
   300  	// which *should* never happen).
   301  	skews[client.config.Id] = Skew{}
   302  	return skews, nil
   303  }
   304  
   305  // claimLeaseOps returns the []txn.Op necessary to claim the supplied lease
   306  // until duration in the future, and a cache entry corresponding to the values
   307  // that will be written if the transaction succeeds. If the claim would conflict
   308  // with cached state, it returns ErrInvalid.
   309  func (client *client) claimLeaseOps(name string, request Request) ([]txn.Op, entry, error) {
   310  
   311  	// We can't claim a lease that's already held.
   312  	if _, found := client.entries[name]; found {
   313  		return nil, entry{}, ErrInvalid
   314  	}
   315  
   316  	// According to the local clock, we want the lease to extend until
   317  	// <duration> in the future.
   318  	now := client.config.Clock.Now()
   319  	expiry := now.Add(request.Duration)
   320  	nextEntry := entry{
   321  		holder: request.Holder,
   322  		expiry: expiry,
   323  		writer: client.config.Id,
   324  	}
   325  
   326  	// We need to write the entry to the database in a specific format.
   327  	leaseDoc, err := newLeaseDoc(client.config.Namespace, name, nextEntry)
   328  	if err != nil {
   329  		return nil, entry{}, errors.Trace(err)
   330  	}
   331  	extendLeaseOp := txn.Op{
   332  		C:      client.config.Collection,
   333  		Id:     leaseDoc.Id,
   334  		Assert: txn.DocMissing,
   335  		Insert: leaseDoc,
   336  	}
   337  
   338  	// We always write a clock-update operation *before* writing lease info.
   339  	writeClockOp := client.writeClockOp(now)
   340  	ops := []txn.Op{writeClockOp, extendLeaseOp}
   341  	return ops, nextEntry, nil
   342  }
   343  
   344  // extendLeaseOps returns the []txn.Op necessary to extend the supplied lease
   345  // until duration in the future, and a cache entry corresponding to the values
   346  // that will be written if the transaction succeeds. If the supplied lease
   347  // already extends far enough that no operations are required, it will return
   348  // errNoExtension. If the extension would conflict with cached state, it will
   349  // return ErrInvalid.
   350  func (client *client) extendLeaseOps(name string, request Request) ([]txn.Op, entry, error) {
   351  
   352  	// Reject extensions when there's no lease, or the holder doesn't match.
   353  	lastEntry, found := client.entries[name]
   354  	if !found {
   355  		return nil, entry{}, ErrInvalid
   356  	}
   357  	if lastEntry.holder != request.Holder {
   358  		return nil, entry{}, ErrInvalid
   359  	}
   360  
   361  	// According to the local clock, we want the lease to extend until
   362  	// <duration> in the future.
   363  	now := client.config.Clock.Now()
   364  	expiry := now.Add(request.Duration)
   365  
   366  	// We don't know what time the original writer thinks it is, but we
   367  	// can figure out the earliest and latest local times at which it
   368  	// could be expecting its original lease to expire.
   369  	skew := client.skews[lastEntry.writer]
   370  	if expiry.Before(skew.Earliest(lastEntry.expiry)) {
   371  		// The "extended" lease will certainly expire before the
   372  		// existing lease could. Done.
   373  		return nil, lastEntry, errNoExtension
   374  	}
   375  	latestExpiry := skew.Latest(lastEntry.expiry)
   376  	if expiry.Before(latestExpiry) {
   377  		// The lease might be long enough, but we're not sure, so we'll
   378  		// write a new one that definitely is long enough; but we must
   379  		// be sure that the new lease has an expiry time such that no
   380  		// other writer can consider it to have expired before the
   381  		// original writer considers its own lease to have expired.
   382  		expiry = latestExpiry
   383  	}
   384  
   385  	// We know we need to write a lease; we know when it needs to expire; we
   386  	// know what needs to go into the local cache:
   387  	nextEntry := entry{
   388  		holder: lastEntry.holder,
   389  		expiry: expiry,
   390  		writer: client.config.Id,
   391  	}
   392  
   393  	// ...and what needs to change in the database, and how to ensure the
   394  	// change is still valid when it's executed.
   395  	extendLeaseOp := txn.Op{
   396  		C:  client.config.Collection,
   397  		Id: client.leaseDocId(name),
   398  		Assert: bson.M{
   399  			fieldLeaseHolder: lastEntry.holder,
   400  			fieldLeaseExpiry: toInt64(lastEntry.expiry),
   401  			fieldLeaseWriter: lastEntry.writer,
   402  		},
   403  		Update: bson.M{"$set": bson.M{
   404  			fieldLeaseExpiry: toInt64(expiry),
   405  			fieldLeaseWriter: client.config.Id,
   406  		}},
   407  	}
   408  
   409  	// We always write a clock-update operation *before* writing lease info.
   410  	writeClockOp := client.writeClockOp(now)
   411  	ops := []txn.Op{writeClockOp, extendLeaseOp}
   412  	return ops, nextEntry, nil
   413  }
   414  
   415  // expireLeaseOps returns the []txn.Op necessary to vacate the lease. If the
   416  // expiration would conflict with cached state, it will return ErrInvalid.
   417  func (client *client) expireLeaseOps(name string) ([]txn.Op, error) {
   418  
   419  	// We can't expire a lease that doesn't exist.
   420  	lastEntry, found := client.entries[name]
   421  	if !found {
   422  		return nil, ErrInvalid
   423  	}
   424  
   425  	// We also can't expire a lease whose expiry time may be in the future.
   426  	skew := client.skews[lastEntry.writer]
   427  	latestExpiry := skew.Latest(lastEntry.expiry)
   428  	now := client.config.Clock.Now()
   429  	if !now.After(latestExpiry) {
   430  		client.logger.Tracef("lease %q expires in the future", name)
   431  		return nil, ErrInvalid
   432  	}
   433  
   434  	// The database change is simple, and depends on the lease doc being
   435  	// untouched since we looked:
   436  	expireLeaseOp := txn.Op{
   437  		C:  client.config.Collection,
   438  		Id: client.leaseDocId(name),
   439  		Assert: bson.M{
   440  			fieldLeaseHolder: lastEntry.holder,
   441  			fieldLeaseExpiry: toInt64(lastEntry.expiry),
   442  			fieldLeaseWriter: lastEntry.writer,
   443  		},
   444  		Remove: true,
   445  	}
   446  
   447  	// We always write a clock-update operation *before* writing lease info.
   448  	// Removing a lease document counts as writing lease info.
   449  	writeClockOp := client.writeClockOp(now)
   450  	ops := []txn.Op{writeClockOp, expireLeaseOp}
   451  	return ops, nil
   452  }
   453  
   454  // writeClockOp returns a txn.Op which writes the supplied time to the writer's
   455  // field in the skew doc, and aborts if a more recent time has been recorded for
   456  // that writer.
   457  func (client *client) writeClockOp(now time.Time) txn.Op {
   458  	dbNow := toInt64(now)
   459  	dbKey := fmt.Sprintf("%s.%s", fieldClockWriters, client.config.Id)
   460  	return txn.Op{
   461  		C:  client.config.Collection,
   462  		Id: client.clockDocId(),
   463  		Assert: bson.M{
   464  			"$or": []bson.M{{
   465  				dbKey: bson.M{"$lte": dbNow},
   466  			}, {
   467  				dbKey: bson.M{"$exists": false},
   468  			}},
   469  		},
   470  		Update: bson.M{
   471  			"$set": bson.M{dbKey: dbNow},
   472  		},
   473  	}
   474  }
   475  
   476  // assertOp returns a txn.Op which will succeed only if holder holds the
   477  // named lease.
   478  func (client *client) assertOp(name, holder string) txn.Op {
   479  	return txn.Op{
   480  		C:  client.config.Collection,
   481  		Id: client.leaseDocId(name),
   482  		Assert: bson.M{
   483  			fieldLeaseHolder: holder,
   484  		},
   485  	}
   486  }
   487  
   488  // clockDocId returns the id of the clock document in the client's namespace.
   489  func (client *client) clockDocId() string {
   490  	return clockDocId(client.config.Namespace)
   491  }
   492  
   493  // leaseDocId returns the id of the named lease document in the client's
   494  // namespace.
   495  func (client *client) leaseDocId(name string) string {
   496  	return leaseDocId(client.config.Namespace, name)
   497  }
   498  
   499  // entry holds the details of a lease and how it was written.
   500  type entry struct {
   501  	// holder identifies the current holder of the lease.
   502  	holder string
   503  
   504  	// expiry is the (writer-local) time at which the lease is safe to remove.
   505  	expiry time.Time
   506  
   507  	// writer identifies the client that wrote the lease.
   508  	writer string
   509  }
   510  
   511  // errNoExtension is used internally to avoid running unnecessary transactions.
   512  var errNoExtension = errors.New("lease needs no extension")