github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/state/presence/presence.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  // The presence package implements an interface for observing liveness
     5  // of arbitrary keys (agents, processes, etc) on top of MongoDB.
     6  // The design works by periodically updating the database so that
     7  // watchers can tell an arbitrary key is alive.
     8  package presence
     9  
    10  import (
    11  	"fmt"
    12  	"strconv"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/juju/errors"
    17  	"github.com/juju/loggo"
    18  	"github.com/juju/names"
    19  	"gopkg.in/mgo.v2"
    20  	"gopkg.in/mgo.v2/bson"
    21  	"launchpad.net/tomb"
    22  )
    23  
    24  var logger = loggo.GetLogger("juju.state.presence")
    25  
    26  type Presencer interface {
    27  	AgentPresence() (bool, error)
    28  	SetAgentPresence() (*Pinger, error)
    29  	WaitAgentPresence(time.Duration) error
    30  }
    31  
    32  // docIDInt64 generates a globally unique id value
    33  // where the environment uuid is prefixed to the
    34  // given int64 localID.
    35  func docIDInt64(envUUID string, localID int64) string {
    36  	return envUUID + ":" + strconv.FormatInt(localID, 10)
    37  }
    38  
    39  // docIDStr generates a globally unique id value
    40  // where the environment uuid is prefixed to the
    41  // given string localID.
    42  func docIDStr(envUUID string, localID string) string {
    43  	return envUUID + ":" + localID
    44  }
    45  
    46  // The implementation works by assigning a unique sequence number to each
    47  // pinger that is alive, and the pinger is then responsible for
    48  // periodically updating the current time slot document with its
    49  // sequence number so that watchers can tell it is alive.
    50  //
    51  // There is only one time slot document per time slot, per environment. The
    52  // internal implementation of the time slot document is as follows:
    53  //
    54  // {
    55  //   "_id":   <environ UUID>:<time slot>,
    56  //   "slot": <slot>,
    57  //   "env-uuid": <environ UUID>,
    58  //   "alive": { hex(<pinger seq> / 63) : (1 << (<pinger seq> % 63) | <others>) },
    59  //   "dead":  { hex(<pinger seq> / 63) : (1 << (<pinger seq> % 63) | <others>) },
    60  // }
    61  //
    62  // All pingers that have their sequence number under "alive" and not
    63  // under "dead" are currently alive. This design enables implementing
    64  // a ping with a single update operation, a kill with another operation,
    65  // and obtaining liveness data with a single query that returns two
    66  // documents (the last two time slots).
    67  //
    68  // A new pinger sequence is obtained every time a pinger starts by atomically
    69  // incrementing a counter in a document in a helper collection. There is only
    70  // one such document per environment. That sequence number is then inserted
    71  // into the beings collection to establish the mapping between pinger sequence
    72  // and key.
    73  
    74  // BUG(gn): The pings and beings collection currently grow without bound.
    75  
    76  // A Watcher can watch any number of pinger keys for liveness changes.
    77  type Watcher struct {
    78  	envUUID string
    79  	tomb    tomb.Tomb
    80  	base    *mgo.Collection
    81  	pings   *mgo.Collection
    82  	beings  *mgo.Collection
    83  
    84  	// delta is an approximate clock skew between the local system
    85  	// clock and the database clock.
    86  	delta time.Duration
    87  
    88  	// beingKey and beingSeq are the pinger seq <=> key mappings.
    89  	// Entries in these maps are considered alive.
    90  	beingKey map[int64]string
    91  	beingSeq map[string]int64
    92  
    93  	// watches has the per-key observer channels from Watch/Unwatch.
    94  	watches map[string][]chan<- Change
    95  
    96  	// pending contains all the events to be dispatched to the watcher
    97  	// channels. They're queued during processing and flushed at the
    98  	// end to simplify the algorithm.
    99  	pending []event
   100  
   101  	// request is used to deliver requests from the public API into
   102  	// the the gorotuine loop.
   103  	request chan interface{}
   104  
   105  	// syncDone contains pending done channels from sync requests.
   106  	syncDone []chan bool
   107  
   108  	// next will dispatch when it's time to sync the database
   109  	// knowledge. It's maintained here so that ForceRefresh
   110  	// can manipulate it to force a sync sooner.
   111  	next <-chan time.Time
   112  }
   113  
   114  type event struct {
   115  	ch    chan<- Change
   116  	key   string
   117  	alive bool
   118  }
   119  
   120  // Change holds a liveness change notification.
   121  type Change struct {
   122  	Key   string
   123  	Alive bool
   124  }
   125  
   126  // NewWatcher returns a new Watcher.
   127  func NewWatcher(base *mgo.Collection, envTag names.EnvironTag) *Watcher {
   128  	w := &Watcher{
   129  		envUUID:  envTag.Id(),
   130  		base:     base,
   131  		pings:    pingsC(base),
   132  		beings:   beingsC(base),
   133  		beingKey: make(map[int64]string),
   134  		beingSeq: make(map[string]int64),
   135  		watches:  make(map[string][]chan<- Change),
   136  		request:  make(chan interface{}),
   137  	}
   138  	go func() {
   139  		err := w.loop()
   140  		cause := errors.Cause(err)
   141  		// tomb expects ErrDying or ErrStillAlive as
   142  		// exact values, so we need to log and unwrap
   143  		// the error first.
   144  		if err != nil && cause != tomb.ErrDying {
   145  			logger.Infof("watcher loop failed: %v", err)
   146  		}
   147  		w.tomb.Kill(cause)
   148  		w.tomb.Done()
   149  	}()
   150  	return w
   151  }
   152  
   153  // Stop stops all the watcher activities.
   154  func (w *Watcher) Stop() error {
   155  	w.tomb.Kill(nil)
   156  	return errors.Trace(w.tomb.Wait())
   157  }
   158  
   159  // Dead returns a channel that is closed when the watcher has stopped.
   160  func (w *Watcher) Dead() <-chan struct{} {
   161  	return w.tomb.Dead()
   162  }
   163  
   164  // Err returns the error with which the watcher stopped.
   165  // It returns nil if the watcher stopped cleanly, tomb.ErrStillAlive
   166  // if the watcher is still running properly, or the respective error
   167  // if the watcher is terminating or has terminated with an error.
   168  func (w *Watcher) Err() error {
   169  	return w.tomb.Err()
   170  }
   171  
   172  type reqWatch struct {
   173  	key string
   174  	ch  chan<- Change
   175  }
   176  
   177  type reqUnwatch struct {
   178  	key string
   179  	ch  chan<- Change
   180  }
   181  
   182  type reqSync struct {
   183  	done chan bool
   184  }
   185  
   186  type reqAlive struct {
   187  	key    string
   188  	result chan bool
   189  }
   190  
   191  func (w *Watcher) sendReq(req interface{}) {
   192  	select {
   193  	case w.request <- req:
   194  	case <-w.tomb.Dying():
   195  	}
   196  }
   197  
   198  // Watch starts watching the liveness of key. An event will
   199  // be sent onto ch to report the initial status for the key, and
   200  // from then on a new event will be sent whenever a change is
   201  // detected. Change values sent to the channel must be consumed,
   202  // or the whole watcher will blocked.
   203  func (w *Watcher) Watch(key string, ch chan<- Change) {
   204  	w.sendReq(reqWatch{key, ch})
   205  }
   206  
   207  // Unwatch stops watching the liveness of key via ch.
   208  func (w *Watcher) Unwatch(key string, ch chan<- Change) {
   209  	w.sendReq(reqUnwatch{key, ch})
   210  }
   211  
   212  // StartSync forces the watcher to load new events from the database.
   213  func (w *Watcher) StartSync() {
   214  	w.sendReq(reqSync{nil})
   215  }
   216  
   217  // Sync forces the watcher to load new events from the database and blocks
   218  // until all events have been dispatched.
   219  func (w *Watcher) Sync() {
   220  	done := make(chan bool)
   221  	w.sendReq(reqSync{done})
   222  	select {
   223  	case <-done:
   224  	case <-w.tomb.Dying():
   225  	}
   226  }
   227  
   228  // Alive returns whether the key is currently considered alive by w,
   229  // or an error in case the watcher is dying.
   230  func (w *Watcher) Alive(key string) (bool, error) {
   231  	result := make(chan bool, 1)
   232  	w.sendReq(reqAlive{key, result})
   233  	var alive bool
   234  	select {
   235  	case alive = <-result:
   236  	case <-w.tomb.Dying():
   237  		return false, errors.Errorf("cannot check liveness: watcher is dying")
   238  	}
   239  	return alive, nil
   240  }
   241  
   242  // period is the length of each time slot in seconds.
   243  // It's not a time.Duration because the code is more convenient like
   244  // this and also because sub-second timings don't work as the slot
   245  // identifier is an int64 in seconds.
   246  var period int64 = 30
   247  
   248  // loop implements the main watcher loop.
   249  func (w *Watcher) loop() error {
   250  	var err error
   251  	if w.delta, err = clockDelta(w.base); err != nil {
   252  		return errors.Trace(err)
   253  	}
   254  	w.next = time.After(0)
   255  	for {
   256  		select {
   257  		case <-w.tomb.Dying():
   258  			return errors.Trace(tomb.ErrDying)
   259  		case <-w.next:
   260  			w.next = time.After(time.Duration(period) * time.Second)
   261  			syncDone := w.syncDone
   262  			w.syncDone = nil
   263  			if err := w.sync(); err != nil {
   264  				return errors.Trace(err)
   265  			}
   266  			w.flush()
   267  			for _, done := range syncDone {
   268  				close(done)
   269  			}
   270  		case req := <-w.request:
   271  			w.handle(req)
   272  			w.flush()
   273  		}
   274  	}
   275  }
   276  
   277  // flush sends all pending events to their respective channels.
   278  func (w *Watcher) flush() {
   279  	// w.pending may get new requests as we handle other requests.
   280  	for i := 0; i < len(w.pending); i++ {
   281  		e := &w.pending[i]
   282  		for e.ch != nil {
   283  			select {
   284  			case <-w.tomb.Dying():
   285  				return
   286  			case req := <-w.request:
   287  				w.handle(req)
   288  				continue
   289  			case e.ch <- Change{e.key, e.alive}:
   290  			}
   291  			break
   292  		}
   293  	}
   294  	w.pending = w.pending[:0]
   295  }
   296  
   297  // handle deals with requests delivered by the public API
   298  // onto the background watcher goroutine.
   299  func (w *Watcher) handle(req interface{}) {
   300  	logger.Tracef("got request: %#v", req)
   301  	switch r := req.(type) {
   302  	case reqSync:
   303  		w.next = time.After(0)
   304  		if r.done != nil {
   305  			w.syncDone = append(w.syncDone, r.done)
   306  		}
   307  	case reqWatch:
   308  		for _, ch := range w.watches[r.key] {
   309  			if ch == r.ch {
   310  				panic("adding channel twice for same key")
   311  			}
   312  		}
   313  		w.watches[r.key] = append(w.watches[r.key], r.ch)
   314  		_, alive := w.beingSeq[r.key]
   315  		w.pending = append(w.pending, event{r.ch, r.key, alive})
   316  	case reqUnwatch:
   317  		watches := w.watches[r.key]
   318  		for i, ch := range watches {
   319  			if ch == r.ch {
   320  				watches[i] = watches[len(watches)-1]
   321  				w.watches[r.key] = watches[:len(watches)-1]
   322  				break
   323  			}
   324  		}
   325  		for i := range w.pending {
   326  			e := &w.pending[i]
   327  			if e.key == r.key && e.ch == r.ch {
   328  				e.ch = nil
   329  			}
   330  		}
   331  	case reqAlive:
   332  		_, alive := w.beingSeq[r.key]
   333  		r.result <- alive
   334  	default:
   335  		panic(fmt.Errorf("unknown request: %T", req))
   336  	}
   337  }
   338  
   339  type beingInfo struct {
   340  	DocID   string `bson:"_id,omitempty"`
   341  	Seq     int64  `bson:"seq,omitempty"`
   342  	EnvUUID string `bson:"env-uuid,omitempty"`
   343  	Key     string `bson:"key,omitempty"`
   344  }
   345  
   346  type pingInfo struct {
   347  	DocID string           `bson:"_id,omitempty"`
   348  	Slot  int64            `bson:"slot,omitempty"`
   349  	Alive map[string]int64 `bson:",omitempty"`
   350  	Dead  map[string]int64 `bson:",omitempty"`
   351  }
   352  
   353  func (w *Watcher) findAllBeings() (map[int64]beingInfo, error) {
   354  	beings := make([]beingInfo, 0)
   355  	session := w.beings.Database.Session.Copy()
   356  	defer session.Close()
   357  	beingsC := w.beings.With(session)
   358  
   359  	err := beingsC.Find(bson.D{{"env-uuid", w.envUUID}}).All(&beings)
   360  	if err != nil {
   361  		return nil, err
   362  	}
   363  	beingInfos := make(map[int64]beingInfo, len(beings))
   364  	for _, being := range beings {
   365  		beingInfos[being.Seq] = being
   366  	}
   367  	return beingInfos, nil
   368  }
   369  
   370  // sync updates the watcher knowledge from the database, and
   371  // queues events to observing channels. It fetches the last two time
   372  // slots and compares the union of both to the in-memory state.
   373  func (w *Watcher) sync() error {
   374  	var allBeings map[int64]beingInfo
   375  	if len(w.beingKey) == 0 {
   376  		// The very first time we sync, we grab all ever-known beings,
   377  		// so we don't have to look them up one-by-one
   378  		var err error
   379  		if allBeings, err = w.findAllBeings(); err != nil {
   380  			return errors.Trace(err)
   381  		}
   382  	}
   383  	s := timeSlot(time.Now(), w.delta)
   384  	slot := docIDInt64(w.envUUID, s)
   385  	previousSlot := docIDInt64(w.envUUID, s-period)
   386  	session := w.pings.Database.Session.Copy()
   387  	defer session.Close()
   388  	pings := w.pings.With(session)
   389  	var ping []pingInfo
   390  	q := bson.D{{"$or", []pingInfo{{DocID: slot}, {DocID: previousSlot}}}}
   391  	err := pings.Find(q).All(&ping)
   392  	if err != nil && err == mgo.ErrNotFound {
   393  		return errors.Trace(err)
   394  	}
   395  
   396  	// Learn about all enforced deaths.
   397  	dead := make(map[int64]bool)
   398  	for i := range ping {
   399  		for key, value := range ping[i].Dead {
   400  			k, err := strconv.ParseInt(key, 16, 64)
   401  			if err != nil {
   402  				err = errors.Annotatef(err, "presence cannot parse dead key: %q", key)
   403  				panic(err)
   404  			}
   405  			k *= 63
   406  			for i := int64(0); i < 63 && value > 0; i++ {
   407  				on := value&1 == 1
   408  				value >>= 1
   409  				if !on {
   410  					continue
   411  				}
   412  				seq := k + i
   413  				dead[seq] = true
   414  				logger.Tracef("found seq=%d dead", seq)
   415  			}
   416  		}
   417  	}
   418  
   419  	// Learn about all the pingers that reported and queue
   420  	// events for those that weren't known to be alive and
   421  	// are not reportedly dead either.
   422  	beingsC := w.beings.With(session)
   423  	alive := make(map[int64]bool)
   424  	being := beingInfo{}
   425  	for i := range ping {
   426  		for key, value := range ping[i].Alive {
   427  			k, err := strconv.ParseInt(key, 16, 64)
   428  			if err != nil {
   429  				err = errors.Annotatef(err, "presence cannot parse alive key: %q", key)
   430  				panic(err)
   431  			}
   432  			k *= 63
   433  			for i := int64(0); i < 63 && value > 0; i++ {
   434  				on := value&1 == 1
   435  				value >>= 1
   436  				if !on {
   437  					continue
   438  				}
   439  				seq := k + i
   440  				alive[seq] = true
   441  				if _, ok := w.beingKey[seq]; ok {
   442  					continue
   443  				}
   444  				// Check if the being exists in the 'all' map,
   445  				// otherwise do a single lookup in mongo
   446  				var ok bool
   447  				if being, ok = allBeings[seq]; !ok {
   448  					err := beingsC.Find(bson.D{{"_id", docIDInt64(w.envUUID, seq)}}).One(&being)
   449  					if err == mgo.ErrNotFound {
   450  						logger.Tracef("found seq=%d unowned", seq)
   451  						continue
   452  					} else if err != nil {
   453  						return errors.Trace(err)
   454  					}
   455  				}
   456  				cur := w.beingSeq[being.Key]
   457  				if cur < seq {
   458  					delete(w.beingKey, cur)
   459  				} else {
   460  					// Current sequence is more recent.
   461  					continue
   462  				}
   463  				w.beingKey[seq] = being.Key
   464  				w.beingSeq[being.Key] = seq
   465  				if cur > 0 || dead[seq] {
   466  					continue
   467  				}
   468  				logger.Tracef("found seq=%d alive with key %q", seq, being.Key)
   469  				for _, ch := range w.watches[being.Key] {
   470  					w.pending = append(w.pending, event{ch, being.Key, true})
   471  				}
   472  			}
   473  		}
   474  	}
   475  
   476  	// Pingers that were known to be alive and haven't reported
   477  	// in the last two slots are now considered dead. Dispatch
   478  	// the respective events and forget their sequences.
   479  	for seq, key := range w.beingKey {
   480  		if dead[seq] || !alive[seq] {
   481  			delete(w.beingKey, seq)
   482  			delete(w.beingSeq, key)
   483  			for _, ch := range w.watches[key] {
   484  				w.pending = append(w.pending, event{ch, key, false})
   485  			}
   486  		}
   487  	}
   488  	return nil
   489  }
   490  
   491  // Pinger periodically reports that a specific key is alive, so that
   492  // watchers interested on that fact can react appropriately.
   493  type Pinger struct {
   494  	envUUID  string
   495  	mu       sync.Mutex
   496  	tomb     tomb.Tomb
   497  	base     *mgo.Collection
   498  	pings    *mgo.Collection
   499  	started  bool
   500  	beingKey string
   501  	beingSeq int64
   502  	fieldKey string // hex(beingKey / 63)
   503  	fieldBit uint64 // 1 << (beingKey%63)
   504  	lastSlot int64
   505  	delta    time.Duration
   506  }
   507  
   508  // NewPinger returns a new Pinger to report that key is alive.
   509  // It starts reporting after Start is called.
   510  func NewPinger(base *mgo.Collection, envTag names.EnvironTag, key string) *Pinger {
   511  	return &Pinger{
   512  		base:     base,
   513  		pings:    pingsC(base),
   514  		beingKey: key,
   515  		envUUID:  envTag.Id(),
   516  	}
   517  }
   518  
   519  // Start starts periodically reporting that p's key is alive.
   520  func (p *Pinger) Start() error {
   521  	p.mu.Lock()
   522  	defer p.mu.Unlock()
   523  	if p.started {
   524  		return errors.Errorf("pinger already started")
   525  	}
   526  	p.tomb = tomb.Tomb{}
   527  	if err := p.prepare(); err != nil {
   528  		return errors.Trace(err)
   529  	}
   530  	logger.Tracef("starting pinger for %q with seq=%d", p.beingKey, p.beingSeq)
   531  	if err := p.ping(); err != nil {
   532  		return errors.Trace(err)
   533  	}
   534  	p.started = true
   535  	go func() {
   536  		err := p.loop()
   537  		cause := errors.Cause(err)
   538  		// tomb expects ErrDying or ErrStillAlive as
   539  		// exact values, so we need to log and unwrap
   540  		// the error first.
   541  		if err != nil && cause != tomb.ErrDying {
   542  			logger.Infof("pinger loop failed: %v", err)
   543  		}
   544  		p.tomb.Kill(cause)
   545  		p.tomb.Done()
   546  	}()
   547  	return nil
   548  }
   549  
   550  // Stop stops p's periodical ping.
   551  // Watchers will not notice p has stopped pinging until the
   552  // previous ping times out.
   553  func (p *Pinger) Stop() error {
   554  	p.mu.Lock()
   555  	defer p.mu.Unlock()
   556  	if p.started {
   557  		logger.Tracef("stopping pinger for %q with seq=%d", p.beingKey, p.beingSeq)
   558  	}
   559  	p.tomb.Kill(nil)
   560  	err := p.tomb.Wait()
   561  	// TODO ping one more time to guarantee a late timeout.
   562  	p.started = false
   563  	return errors.Trace(err)
   564  
   565  }
   566  
   567  // Kill stops p's periodical ping and immediately reports that it is dead.
   568  func (p *Pinger) Kill() error {
   569  	p.mu.Lock()
   570  	defer p.mu.Unlock()
   571  	if p.started {
   572  		logger.Tracef("killing pinger for %q (was started)", p.beingKey)
   573  		return p.killStarted()
   574  	}
   575  	logger.Tracef("killing pinger for %q (was stopped)", p.beingKey)
   576  	return p.killStopped()
   577  }
   578  
   579  // killStarted kills the pinger while it is running, by first
   580  // stopping it and then recording in the last pinged slot that
   581  // the pinger was killed.
   582  func (p *Pinger) killStarted() error {
   583  	p.tomb.Kill(nil)
   584  	killErr := p.tomb.Wait()
   585  	p.started = false
   586  
   587  	slot := p.lastSlot
   588  	udoc := bson.D{
   589  		{"$set", bson.D{{"slot", slot}}},
   590  		{"$inc", bson.D{{"dead." + p.fieldKey, p.fieldBit}}}}
   591  	session := p.pings.Database.Session.Copy()
   592  	defer session.Close()
   593  	pings := p.pings.With(session)
   594  	if _, err := pings.UpsertId(docIDInt64(p.envUUID, slot), udoc); err != nil {
   595  		return errors.Trace(err)
   596  	}
   597  	return errors.Trace(killErr)
   598  }
   599  
   600  // killStopped kills the pinger while it is not running, by
   601  // first allocating a new sequence, and then atomically recording
   602  // the new sequence both as alive and dead at once.
   603  func (p *Pinger) killStopped() error {
   604  	if err := p.prepare(); err != nil {
   605  		return err
   606  	}
   607  	slot := timeSlot(time.Now(), p.delta)
   608  	udoc := bson.D{
   609  		{"$set", bson.D{{"slot", slot}}},
   610  		{"$inc", bson.D{
   611  			{"dead." + p.fieldKey, p.fieldBit},
   612  			{"alive." + p.fieldKey, p.fieldBit},
   613  		}}}
   614  	session := p.pings.Database.Session.Copy()
   615  	defer session.Close()
   616  	pings := p.pings.With(session)
   617  	_, err := pings.UpsertId(docIDInt64(p.envUUID, slot), udoc)
   618  	return errors.Trace(err)
   619  }
   620  
   621  // loop is the main pinger loop that runs while it is
   622  // in started state.
   623  func (p *Pinger) loop() error {
   624  	for {
   625  		select {
   626  		case <-p.tomb.Dying():
   627  			return errors.Trace(tomb.ErrDying)
   628  		case <-time.After(time.Duration(float64(period+1)*0.75) * time.Second):
   629  			if err := p.ping(); err != nil {
   630  				return errors.Trace(err)
   631  			}
   632  		}
   633  	}
   634  }
   635  
   636  // prepare allocates a new unique sequence for the
   637  // pinger key and prepares the pinger to use it.
   638  func (p *Pinger) prepare() error {
   639  	change := mgo.Change{
   640  		Update:    bson.D{{"$inc", bson.D{{"seq", int64(1)}}}},
   641  		Upsert:    true,
   642  		ReturnNew: true,
   643  	}
   644  	session := p.base.Database.Session.Copy()
   645  	defer session.Close()
   646  	base := p.base.With(session)
   647  	seqs := seqsC(base)
   648  	var seq struct{ Seq int64 }
   649  	seqID := docIDStr(p.envUUID, "beings")
   650  	if _, err := seqs.FindId(seqID).Apply(change, &seq); err != nil {
   651  		return errors.Trace(err)
   652  	}
   653  	p.beingSeq = seq.Seq
   654  	p.fieldKey = fmt.Sprintf("%x", p.beingSeq/63)
   655  	p.fieldBit = 1 << uint64(p.beingSeq%63)
   656  	p.lastSlot = 0
   657  	beings := beingsC(base)
   658  	return errors.Trace(beings.Insert(
   659  		beingInfo{
   660  			DocID:   docIDInt64(p.envUUID, p.beingSeq),
   661  			Seq:     p.beingSeq,
   662  			EnvUUID: p.envUUID,
   663  			Key:     p.beingKey,
   664  		},
   665  	))
   666  }
   667  
   668  // ping records updates the current time slot with the
   669  // sequence in use by the pinger.
   670  func (p *Pinger) ping() (err error) {
   671  	logger.Tracef("pinging %q with seq=%d", p.beingKey, p.beingSeq)
   672  	defer func() {
   673  		// If the session is killed from underneath us, it panics when we
   674  		// try to copy it, so deal with that here.
   675  		if v := recover(); v != nil {
   676  			if v == "Session already closed" {
   677  				return
   678  			}
   679  			err = fmt.Errorf("%v", v)
   680  		}
   681  	}()
   682  	session := p.pings.Database.Session.Copy()
   683  	defer session.Close()
   684  	if p.delta == 0 {
   685  		base := p.base.With(session)
   686  		delta, err := clockDelta(base)
   687  		if err != nil {
   688  			return errors.Trace(err)
   689  		}
   690  		p.delta = delta
   691  	}
   692  	slot := timeSlot(time.Now(), p.delta)
   693  	if slot == p.lastSlot {
   694  		// Never, ever, ping the same slot twice.
   695  		// The increment below would corrupt the slot.
   696  		return nil
   697  	}
   698  	p.lastSlot = slot
   699  	pings := p.pings.With(session)
   700  	_, err = pings.UpsertId(
   701  		docIDInt64(p.envUUID, slot),
   702  		bson.D{
   703  			{"$set", bson.D{{"slot", slot}}},
   704  			{"$inc", bson.D{{"alive." + p.fieldKey, p.fieldBit}}},
   705  		})
   706  	return errors.Trace(err)
   707  }
   708  
   709  // clockDelta returns the approximate skew between
   710  // the local clock and the database clock.
   711  func clockDelta(c *mgo.Collection) (time.Duration, error) {
   712  	var server struct {
   713  		time.Time `bson:"retval"`
   714  	}
   715  	var isMaster struct {
   716  		LocalTime time.Time `bson:"localTime"`
   717  	}
   718  	var after time.Time
   719  	var before time.Time
   720  	var serverDelay time.Duration
   721  	supportsMasterLocalTime := true
   722  	session := c.Database.Session.Copy()
   723  	defer session.Close()
   724  	db := c.Database.With(session)
   725  	for i := 0; i < 10; i++ {
   726  		if supportsMasterLocalTime {
   727  			// Try isMaster.localTime, which is present since MongoDB 2.2
   728  			// and does not require admin privileges.
   729  			before = time.Now()
   730  			err := db.Run("isMaster", &isMaster)
   731  			after = time.Now()
   732  			if err != nil {
   733  				return 0, errors.Trace(err)
   734  			}
   735  			if isMaster.LocalTime.IsZero() {
   736  				supportsMasterLocalTime = false
   737  				continue
   738  			} else {
   739  				serverDelay = isMaster.LocalTime.Sub(before)
   740  			}
   741  		} else {
   742  			// If MongoDB doesn't have localTime as part of
   743  			// isMaster result, it means that the server is likely
   744  			// a MongoDB older than 2.2.
   745  			//
   746  			// Fallback to 'eval' works fine on versions older than
   747  			// 2.4 where it does not require admin privileges.
   748  			//
   749  			// NOTE: 'eval' takes a global write lock unless you
   750  			// specify 'nolock' (which we are not doing below, for
   751  			// no apparent reason), so it is quite likely that the
   752  			// eval could take a relatively long time to acquire
   753  			// the lock and thus cause a retry on the callDelay
   754  			// check below on a busy server.
   755  			before = time.Now()
   756  			err := db.Run(bson.D{{"$eval", "function() { return new Date(); }"}}, &server)
   757  			after = time.Now()
   758  			if err != nil {
   759  				return 0, errors.Trace(err)
   760  			}
   761  			serverDelay = server.Sub(before)
   762  		}
   763  		// If the call to the server takes longer than a few seconds we
   764  		// retry it a couple more times before giving up. It is unclear
   765  		// why the retry would help at all here.
   766  		//
   767  		// If the server takes longer than the specified amount of time
   768  		// on every single try, then we simply give up.
   769  		callDelay := after.Sub(before)
   770  		if callDelay > 5*time.Second {
   771  			continue
   772  		}
   773  		return serverDelay, nil
   774  	}
   775  	return 0, errors.Errorf("cannot synchronize clock with database server")
   776  }
   777  
   778  // timeSlot returns the current time slot, in seconds since the
   779  // epoch, for the provided now time. The delta skew is applied
   780  // to the now time to improve the synchronization with a
   781  // centrally agreed time.
   782  //
   783  // The result of this method may be manipulated for test purposes
   784  // by fakeTimeSlot and realTimeSlot.
   785  func timeSlot(now time.Time, delta time.Duration) int64 {
   786  	fakeMutex.Lock()
   787  	fake := !fakeNow.IsZero()
   788  	if fake {
   789  		now = fakeNow
   790  	}
   791  	slot := now.Add(delta).Unix()
   792  	slot -= slot % period
   793  	if fake {
   794  		slot += int64(fakeOffset) * period
   795  	}
   796  	fakeMutex.Unlock()
   797  	return slot
   798  }
   799  
   800  var (
   801  	fakeMutex  sync.Mutex // protects fakeOffset, fakeNow
   802  	fakeNow    time.Time
   803  	fakeOffset int
   804  )
   805  
   806  // fakeTimeSlot hardcodes the slot time returned by the timeSlot
   807  // function for testing purposes. The offset parameter is the slot
   808  // position to return: offsets +1 and -1 are +period and -period
   809  // seconds from slot 0, respectively.
   810  func fakeTimeSlot(offset int) {
   811  	fakeMutex.Lock()
   812  	if fakeNow.IsZero() {
   813  		fakeNow = time.Now()
   814  	}
   815  	fakeOffset = offset
   816  	fakeMutex.Unlock()
   817  	logger.Infof("faking presence to time slot %d", offset)
   818  }
   819  
   820  // realTimeSlot disables the hardcoding introduced by fakeTimeSlot.
   821  func realTimeSlot() {
   822  	fakeMutex.Lock()
   823  	fakeNow = time.Time{}
   824  	fakeOffset = 0
   825  	fakeMutex.Unlock()
   826  	logger.Infof("not faking presence time. Real time slot in use.")
   827  }
   828  
   829  func seqsC(base *mgo.Collection) *mgo.Collection {
   830  	return base.Database.C(base.Name + ".seqs")
   831  }
   832  
   833  func beingsC(base *mgo.Collection) *mgo.Collection {
   834  	return base.Database.C(base.Name + ".beings")
   835  }
   836  
   837  func pingsC(base *mgo.Collection) *mgo.Collection {
   838  	return base.Database.C(base.Name + ".pings")
   839  }