github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/state/watcher/watcher.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  // The watcher package provides an interface for observing changes
     5  // to arbitrary MongoDB documents that are maintained via the
     6  // mgo/txn transaction package.
     7  package watcher
     8  
     9  import (
    10  	"fmt"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/juju/errors"
    15  	"github.com/juju/loggo"
    16  	"gopkg.in/juju/worker.v1"
    17  	"gopkg.in/mgo.v2"
    18  	"gopkg.in/mgo.v2/bson"
    19  	"gopkg.in/tomb.v2"
    20  
    21  	"github.com/juju/juju/mongo"
    22  	jworker "github.com/juju/juju/worker"
    23  )
    24  
    25  // BaseWatcher represents watch methods on the worker
    26  // responsible for watching for database changes.
    27  type BaseWatcher interface {
    28  	worker.Worker
    29  
    30  	Dead() <-chan struct{}
    31  	Err() error
    32  
    33  	// Watch will send events on the Change channel whenever the document you
    34  	// are watching is changed. Note that in order to not miss any changes, you
    35  	// should start Watching the document before you read the document.
    36  	// At this low level Watch layer, there will not be an initial event.
    37  	// Instead, Watch is synchronous, the Watch will not return until the
    38  	// watcher is registered.
    39  	// TODO(jam): 2019-01-31 Update Watch() to return an error rather now
    40  	// that it is synchronous
    41  	Watch(collection string, id interface{}, ch chan<- Change)
    42  
    43  	// WatchMulti is similar to Watch, it just allows you to watch a set of
    44  	// documents in the same collection in one request. Just like Watch,
    45  	// no event will be sent for documents that don't change.
    46  	WatchMulti(collection string, ids []interface{}, ch chan<- Change) error
    47  
    48  	// WatchCollection will give an event if any documents are modified/added/removed
    49  	// from the collection.
    50  	// TODO(jam): 2019-01-31 Update WatchCollection() to return an error rather now
    51  	// that it is synchronous
    52  	WatchCollection(collection string, ch chan<- Change)
    53  
    54  	// WatchCollectionWithFilter will give an event if any documents are modified/added/removed
    55  	// from the collection. Filter can be supplied to check if a given document
    56  	// should send an event.
    57  	// TODO(jam): 2019-01-31 Update WatchCollectionWithFilter() to return an error rather now
    58  	// that it is synchronous
    59  	WatchCollectionWithFilter(collection string, ch chan<- Change, filter func(interface{}) bool)
    60  
    61  	// Unwatch is an asynchronous request to stop watching a given watch.
    62  	// It is an error to try to Unwatch something that is not being watched.
    63  	// Note that Unwatch can be called for things that have been registered with
    64  	// either Watch() or WatchMulti(). For WatchCollection or WatchCollectionWithFilter
    65  	// use UnwatchCollection.
    66  	// TODO(jam): 2019-01-31 Currently Unwatching something that isn't watched
    67  	// is a panic, should we make the method synchronous and turn it into an error?
    68  	// Or just turn it into a no-op
    69  	Unwatch(collection string, id interface{}, ch chan<- Change)
    70  
    71  	// UnwatchCollection is used when you are done with a watch started with
    72  	// either WatchCollection or WatchCollectionWithFilter. You must pass in
    73  	// the same Change channel. Unwatching a collection that isn't being watched
    74  	// is an error that will panic().
    75  	UnwatchCollection(collection string, ch chan<- Change)
    76  }
    77  
    78  var logger = loggo.GetLogger("juju.state.watcher")
    79  
    80  // A Watcher can watch any number of collections and documents for changes.
    81  type Watcher struct {
    82  	tomb         tomb.Tomb
    83  	iteratorFunc func() mongo.Iterator
    84  	log          *mgo.Collection
    85  
    86  	// watches holds the observers managed by Watch/Unwatch.
    87  	watches map[watchKey][]watchInfo
    88  
    89  	// needSync is set when a synchronization should take
    90  	// place.
    91  	needSync bool
    92  
    93  	// syncEvents and requestEvents contain the events to be
    94  	// dispatched to the watcher channels. They're queued during
    95  	// processing and flushed at the end to simplify the algorithm.
    96  	// The two queues are separated because events from sync are
    97  	// handled in reverse order due to the way the algorithm works.
    98  	syncEvents, requestEvents []event
    99  
   100  	// request is used to deliver requests from the public API into
   101  	// the the goroutine loop.
   102  	request chan interface{}
   103  
   104  	// lastId is the most recent transaction id observed by a sync.
   105  	lastId interface{}
   106  }
   107  
   108  // A Change holds information about a document change.
   109  type Change struct {
   110  	// C and Id hold the collection name and document _id field value.
   111  	C  string
   112  	Id interface{}
   113  
   114  	// Revno is the latest known value for the document's txn-revno
   115  	// field, or -1 if the document was deleted.
   116  	Revno int64
   117  }
   118  
   119  type watchKey struct {
   120  	c  string
   121  	id interface{} // nil when watching collection
   122  }
   123  
   124  func (k watchKey) String() string {
   125  	coll := fmt.Sprintf("collection %q", k.c)
   126  	if k.id == nil {
   127  		return coll
   128  	}
   129  	if s, ok := k.id.(string); ok {
   130  		return fmt.Sprintf("document %q in %s", s, coll)
   131  	}
   132  	return fmt.Sprintf("document %v in %s", k.id, coll)
   133  }
   134  
   135  // match returns whether the receiving watch key,
   136  // which may refer to a particular item or
   137  // an entire collection, matches k1, which refers
   138  // to a particular item.
   139  func (k watchKey) match(k1 watchKey) bool {
   140  	if k.c != k1.c {
   141  		return false
   142  	}
   143  	if k.id == nil {
   144  		// k refers to entire collection
   145  		return true
   146  	}
   147  	return k.id == k1.id
   148  }
   149  
   150  type watchInfo struct {
   151  	ch     chan<- Change
   152  	revno  int64
   153  	filter func(interface{}) bool
   154  }
   155  
   156  type event struct {
   157  	ch        chan<- Change
   158  	key       watchKey
   159  	isDeleted bool
   160  	revno     int64
   161  }
   162  
   163  // Period is the delay between each sync.
   164  // It must not be changed when any watchers are active.
   165  var Period time.Duration = 5 * time.Second
   166  
   167  // New returns a new Watcher observing the changelog collection,
   168  // which must be a capped collection maintained by mgo/txn.
   169  func New(changelog *mgo.Collection) *Watcher {
   170  	return newWatcher(changelog, nil)
   171  }
   172  
   173  func newWatcher(changelog *mgo.Collection, iteratorFunc func() mongo.Iterator) *Watcher {
   174  	w := &Watcher{
   175  		log:          changelog,
   176  		iteratorFunc: iteratorFunc,
   177  		watches:      make(map[watchKey][]watchInfo),
   178  		request:      make(chan interface{}),
   179  	}
   180  	if w.iteratorFunc == nil {
   181  		w.iteratorFunc = w.iter
   182  	}
   183  	w.tomb.Go(func() error {
   184  		err := w.loop(Period)
   185  		cause := errors.Cause(err)
   186  		// tomb expects ErrDying or ErrStillAlive as
   187  		// exact values, so we need to log and unwrap
   188  		// the error first.
   189  		if err != nil && cause != tomb.ErrDying {
   190  			logger.Infof("watcher loop failed: %v", err)
   191  		}
   192  		return cause
   193  	})
   194  	return w
   195  }
   196  
   197  // NewDead returns a new watcher that is already dead
   198  // and always returns the given error from its Err method.
   199  func NewDead(err error) *Watcher {
   200  	var w Watcher
   201  	w.tomb.Kill(errors.Trace(err))
   202  	return &w
   203  }
   204  
   205  // Kill is part of the worker.Worker interface.
   206  func (w *Watcher) Kill() {
   207  	w.tomb.Kill(nil)
   208  }
   209  
   210  // Wait is part of the worker.Worker interface.
   211  func (w *Watcher) Wait() error {
   212  	return w.tomb.Wait()
   213  }
   214  
   215  // Stop stops all the watcher activities.
   216  func (w *Watcher) Stop() error {
   217  	return worker.Stop(w)
   218  }
   219  
   220  // Dead returns a channel that is closed when the watcher has stopped.
   221  func (w *Watcher) Dead() <-chan struct{} {
   222  	return w.tomb.Dead()
   223  }
   224  
   225  // Err returns the error with which the watcher stopped.
   226  // It returns nil if the watcher stopped cleanly, tomb.ErrStillAlive
   227  // if the watcher is still running properly, or the respective error
   228  // if the watcher is terminating or has terminated with an error.
   229  func (w *Watcher) Err() error {
   230  	return w.tomb.Err()
   231  }
   232  
   233  type reqWatch struct {
   234  	key  watchKey
   235  	info watchInfo
   236  	// registeredCh is used to indicate when
   237  	registeredCh chan error
   238  }
   239  
   240  func (r reqWatch) Completed() chan error {
   241  	return r.registeredCh
   242  }
   243  
   244  type reqWatchMulti struct {
   245  	collection  string
   246  	ids         []interface{}
   247  	completedCh chan error
   248  	watchCh     chan<- Change
   249  }
   250  
   251  func (r reqWatchMulti) Completed() chan error {
   252  	return r.completedCh
   253  }
   254  
   255  type reqUnwatch struct {
   256  	key watchKey
   257  	ch  chan<- Change
   258  }
   259  
   260  type reqSync struct{}
   261  
   262  // waitableRequest represents a request that is made, and you wait for the core loop to acknowledge the request has been
   263  // received
   264  type waitableRequest interface {
   265  	// Completed returns the channel that the core loop will use to signal completion of the request.
   266  	Completed() chan error
   267  }
   268  
   269  func (w *Watcher) sendReq(req interface{}) {
   270  	select {
   271  	case w.request <- req:
   272  	case <-w.tomb.Dying():
   273  	}
   274  }
   275  
   276  func (w *Watcher) sendAndWaitReq(req waitableRequest) error {
   277  	select {
   278  	case w.request <- req:
   279  	case <-w.tomb.Dying():
   280  		return errors.Trace(tomb.ErrDying)
   281  	}
   282  	completed := req.Completed()
   283  	select {
   284  	case err := <-completed:
   285  		return errors.Trace(err)
   286  	case <-w.tomb.Dying():
   287  		return errors.Trace(tomb.ErrDying)
   288  	}
   289  }
   290  
   291  // Watchstarts watching the given collection and document id.
   292  // An event will be sent onto ch whenever a matching document's txn-revno
   293  // field is observed to change after a transaction is applied.
   294  func (w *Watcher) Watch(collection string, id interface{}, ch chan<- Change) {
   295  	if id == nil {
   296  		panic("watcher: cannot watch a document with nil id")
   297  	}
   298  	w.sendAndWaitReq(reqWatch{
   299  		key:          watchKey{collection, id},
   300  		info:         watchInfo{ch, -2, nil},
   301  		registeredCh: make(chan error),
   302  	})
   303  }
   304  
   305  func (w *Watcher) WatchMulti(collection string, ids []interface{}, ch chan<- Change) error {
   306  	for _, id := range ids {
   307  		if id == nil {
   308  			return errors.Errorf("cannot watch a document with nil id")
   309  		}
   310  	}
   311  	req := reqWatchMulti{
   312  		collection:  collection,
   313  		ids:         ids,
   314  		watchCh:     ch,
   315  		completedCh: make(chan error),
   316  	}
   317  	w.sendReq(req)
   318  	select {
   319  	case err := <-req.completedCh:
   320  		return errors.Trace(err)
   321  	case <-w.tomb.Dying():
   322  		return errors.Trace(tomb.ErrDying)
   323  	}
   324  }
   325  
   326  // WatchCollection starts watching the given collection.
   327  // An event will be sent onto ch whenever the txn-revno field is observed
   328  // to change after a transaction is applied for any document in the collection.
   329  func (w *Watcher) WatchCollection(collection string, ch chan<- Change) {
   330  	w.WatchCollectionWithFilter(collection, ch, nil)
   331  }
   332  
   333  // WatchCollectionWithFilter starts watching the given collection.
   334  // An event will be sent onto ch whenever the txn-revno field is observed
   335  // to change after a transaction is applied for any document in the collection, so long as the
   336  // specified filter function returns true when called with the document id value.
   337  func (w *Watcher) WatchCollectionWithFilter(collection string, ch chan<- Change, filter func(interface{}) bool) {
   338  	w.sendAndWaitReq(reqWatch{
   339  		key:          watchKey{collection, nil},
   340  		info:         watchInfo{ch, 0, filter},
   341  		registeredCh: make(chan error),
   342  	})
   343  }
   344  
   345  // Unwatch stops watching the given collection and document id via ch.
   346  func (w *Watcher) Unwatch(collection string, id interface{}, ch chan<- Change) {
   347  	if id == nil {
   348  		panic("watcher: cannot unwatch a document with nil id")
   349  	}
   350  	w.sendReq(reqUnwatch{watchKey{collection, id}, ch})
   351  }
   352  
   353  // UnwatchCollection stops watching the given collection via ch.
   354  func (w *Watcher) UnwatchCollection(collection string, ch chan<- Change) {
   355  	w.sendReq(reqUnwatch{watchKey{collection, nil}, ch})
   356  }
   357  
   358  // StartSync forces the watcher to load new events from the database.
   359  func (w *Watcher) StartSync() {
   360  	w.sendReq(reqSync{})
   361  }
   362  
   363  // loop implements the main watcher loop.
   364  // period is the delay between each sync.
   365  func (w *Watcher) loop(period time.Duration) error {
   366  	next := time.After(period)
   367  	w.needSync = true
   368  	if err := w.initLastId(); err != nil {
   369  		return errors.Trace(err)
   370  	}
   371  	for {
   372  		if w.needSync {
   373  			if err := w.sync(); err != nil {
   374  				// If the txn log collection overflows from underneath us,
   375  				// the easiest cause of action to recover is to cause the
   376  				// agen tto restart.
   377  				if errors.Cause(err) == cappedPositionLostError {
   378  					// Ideally we'd not import the worker package but that's
   379  					// where all the errors are defined.
   380  					return jworker.ErrRestartAgent
   381  				}
   382  				return errors.Trace(err)
   383  			}
   384  			w.flush()
   385  			next = time.After(period)
   386  		}
   387  		select {
   388  		case <-w.tomb.Dying():
   389  			return errors.Trace(tomb.ErrDying)
   390  		case <-next:
   391  			next = time.After(period)
   392  			w.needSync = true
   393  		case req := <-w.request:
   394  			w.handle(req)
   395  			w.flush()
   396  		}
   397  	}
   398  }
   399  
   400  // flush sends all pending events to their respective channels.
   401  func (w *Watcher) flush() {
   402  	// refreshEvents are stored newest first.
   403  	for i := len(w.syncEvents) - 1; i >= 0; i-- {
   404  		e := &w.syncEvents[i]
   405  		for e.ch != nil {
   406  			change := Change{
   407  				C:     e.key.c,
   408  				Id:    e.key.id,
   409  				Revno: e.revno,
   410  			}
   411  			select {
   412  			case <-w.tomb.Dying():
   413  				return
   414  			case req := <-w.request:
   415  				w.handle(req)
   416  				continue
   417  			case e.ch <- change:
   418  			}
   419  			break
   420  		}
   421  	}
   422  	// requestEvents are stored oldest first, and
   423  	// may grow during the loop.
   424  	for i := 0; i < len(w.requestEvents); i++ {
   425  		e := &w.requestEvents[i]
   426  		for e.ch != nil {
   427  			change := Change{
   428  				C:     e.key.c,
   429  				Id:    e.key.id,
   430  				Revno: e.revno,
   431  			}
   432  			select {
   433  			case <-w.tomb.Dying():
   434  				return
   435  			case req := <-w.request:
   436  				w.handle(req)
   437  				continue
   438  			case e.ch <- change:
   439  			}
   440  			break
   441  		}
   442  	}
   443  	w.syncEvents = w.syncEvents[:0]
   444  	w.requestEvents = w.requestEvents[:0]
   445  }
   446  
   447  // handle deals with requests delivered by the public API
   448  // onto the background watcher goroutine.
   449  func (w *Watcher) handle(req interface{}) {
   450  	logger.Tracef("got request: %#v", req)
   451  	switch r := req.(type) {
   452  	case reqSync:
   453  		w.needSync = true
   454  	case reqWatch:
   455  		for _, info := range w.watches[r.key] {
   456  			if info.ch == r.info.ch {
   457  				panic(fmt.Errorf("tried to re-add channel %v for %s", info.ch, r.key))
   458  			}
   459  		}
   460  		w.watches[r.key] = append(w.watches[r.key], r.info)
   461  		if r.registeredCh != nil {
   462  			select {
   463  			case r.registeredCh <- nil:
   464  			case <-w.tomb.Dying():
   465  			}
   466  		}
   467  	case reqUnwatch:
   468  		watches := w.watches[r.key]
   469  		removed := false
   470  		for i, info := range watches {
   471  			if info.ch == r.ch {
   472  				watches[i] = watches[len(watches)-1]
   473  				w.watches[r.key] = watches[:len(watches)-1]
   474  				removed = true
   475  				break
   476  			}
   477  		}
   478  		if !removed {
   479  			panic(fmt.Errorf("tried to remove missing channel %v for %s", r.ch, r.key))
   480  		}
   481  		for i := range w.requestEvents {
   482  			e := &w.requestEvents[i]
   483  			if r.key.match(e.key) && e.ch == r.ch {
   484  				e.ch = nil
   485  			}
   486  		}
   487  		for i := range w.syncEvents {
   488  			e := &w.syncEvents[i]
   489  			if r.key.match(e.key) && e.ch == r.ch {
   490  				e.ch = nil
   491  			}
   492  		}
   493  	case reqWatchMulti:
   494  		for _, id := range r.ids {
   495  			key := watchKey{c: r.collection, id: id}
   496  			for _, info := range w.watches[key] {
   497  				if info.ch == r.watchCh {
   498  					err := errors.Errorf("tried to re-add channel %v for %s", info.ch, key)
   499  					select {
   500  					case r.completedCh <- err:
   501  					case <-w.tomb.Dying():
   502  					}
   503  					return
   504  				}
   505  			}
   506  		}
   507  		for _, id := range r.ids {
   508  			key := watchKey{c: r.collection, id: id}
   509  			w.watches[key] = append(w.watches[key], watchInfo{ch: r.watchCh, revno: -2, filter: nil})
   510  		}
   511  		select {
   512  		case r.completedCh <- nil:
   513  		case <-w.tomb.Dying():
   514  		}
   515  	default:
   516  		panic(fmt.Errorf("unknown request: %T", req))
   517  	}
   518  }
   519  
   520  // initLastId reads the most recent changelog document and initializes
   521  // lastId with it. This causes all history that precedes the creation
   522  // of the watcher to be ignored.
   523  func (w *Watcher) initLastId() error {
   524  	var entry struct {
   525  		Id interface{} `bson:"_id"`
   526  	}
   527  	err := w.log.Find(nil).Sort("-$natural").One(&entry)
   528  	if err != nil && err != mgo.ErrNotFound {
   529  		return errors.Trace(err)
   530  	}
   531  	w.lastId = entry.Id
   532  	return nil
   533  }
   534  
   535  func (w *Watcher) iter() mongo.Iterator {
   536  	return w.log.Find(nil).Batch(10).Sort("-$natural").Iter()
   537  }
   538  
   539  var cappedPositionLostError = errors.New("capped position lost")
   540  
   541  // sync updates the watcher knowledge from the database, and
   542  // queues events to observing channels.
   543  func (w *Watcher) sync() error {
   544  	w.needSync = false
   545  	// Iterate through log events in reverse insertion order (newest first).
   546  	iter := w.iteratorFunc()
   547  	seen := make(map[watchKey]bool)
   548  	first := true
   549  	lastId := w.lastId
   550  	var entry bson.D
   551  	for iter.Next(&entry) {
   552  		if len(entry) == 0 {
   553  			logger.Tracef("got empty changelog document")
   554  		}
   555  		id := entry[0]
   556  		if id.Name != "_id" {
   557  			panic("watcher: _id field isn't first entry")
   558  		}
   559  		if first {
   560  			w.lastId = id.Value
   561  			first = false
   562  		}
   563  		if id.Value == lastId {
   564  			break
   565  		}
   566  		logger.Tracef("got changelog document: %#v", entry)
   567  		for _, c := range entry[1:] {
   568  			// See txn's Runner.ChangeLog for the structure of log entries.
   569  			var d, r []interface{}
   570  			dr, _ := c.Value.(bson.D)
   571  			for _, item := range dr {
   572  				switch item.Name {
   573  				case "d":
   574  					d, _ = item.Value.([]interface{})
   575  				case "r":
   576  					r, _ = item.Value.([]interface{})
   577  				}
   578  			}
   579  			if len(d) == 0 || len(d) != len(r) {
   580  				logger.Warningf("changelog has invalid collection document: %#v", c)
   581  				continue
   582  			}
   583  			for i := len(d) - 1; i >= 0; i-- {
   584  				key := watchKey{c.Name, d[i]}
   585  				if seen[key] {
   586  					continue
   587  				}
   588  				seen[key] = true
   589  				revno, ok := r[i].(int64)
   590  				if !ok {
   591  					logger.Warningf("changelog has revno with type %T: %#v", r[i], r[i])
   592  					continue
   593  				}
   594  				if revno < 0 {
   595  					revno = -1
   596  				}
   597  				// Queue notifications for per-collection watches.
   598  				for _, info := range w.watches[watchKey{c.Name, nil}] {
   599  					if info.filter != nil && !info.filter(d[i]) {
   600  						continue
   601  					}
   602  					evt := event{
   603  						ch:        info.ch,
   604  						key:       key,
   605  						isDeleted: revno == -1,
   606  						revno:     revno,
   607  					}
   608  					w.syncEvents = append(w.syncEvents, evt)
   609  				}
   610  				// Queue notifications for per-document watches.
   611  				infos := w.watches[key]
   612  				for i, info := range infos {
   613  					if revno > info.revno || revno < 0 && info.revno >= 0 {
   614  						infos[i].revno = revno
   615  						evt := event{
   616  							ch:        info.ch,
   617  							key:       key,
   618  							isDeleted: revno == -1,
   619  							revno:     revno,
   620  						}
   621  						w.syncEvents = append(w.syncEvents, evt)
   622  					}
   623  				}
   624  			}
   625  		}
   626  	}
   627  	if err := iter.Close(); err != nil {
   628  		if qerr, ok := err.(*mgo.QueryError); ok {
   629  			// CappedPositionLost is code 136.
   630  			// Just in case that changes for some reason, we'll also check the error message.
   631  			if qerr.Code == 136 || strings.Contains(qerr.Message, "CappedPositionLost") {
   632  				logger.Warningf("watcher iterator failed due to txn log collection overflow")
   633  				err = cappedPositionLostError
   634  			}
   635  		}
   636  		return errors.Annotate(err, "watcher iteration error")
   637  	}
   638  	return nil
   639  }