github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/mongo/oplog.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package mongo
     5  
     6  import (
     7  	"reflect"
     8  	"time"
     9  
    10  	"github.com/juju/errors"
    11  	"gopkg.in/mgo.v2"
    12  	"gopkg.in/mgo.v2/bson"
    13  	"launchpad.net/tomb"
    14  )
    15  
    16  // OplogDoc represents a document in the oplog.rs collection.
    17  // See: http://www.kchodorow.com/blog/2010/10/12/replication-internals/
    18  //
    19  // The Object and UpdateObject fields are returned raw to allow
    20  // unmarshalling into arbitrary types. Use the UnmarshalObject and
    21  // UnmarshalUpdate methods to unmarshall these fields.
    22  type OplogDoc struct {
    23  	Timestamp    bson.MongoTimestamp `bson:"ts"`
    24  	OperationId  int64               `bson:"h"`
    25  	MongoVersion int                 `bson:"v"`
    26  	Operation    string              `bson:"op"` // "i" - insert, "u" - update, "d" - delete
    27  	Namespace    string              `bson:"ns"`
    28  	Object       *bson.Raw           `bson:"o"`
    29  	UpdateObject *bson.Raw           `bson:"o2"`
    30  }
    31  
    32  // UnmarshalObject unmarshals the Object field into out. The out
    33  // argument should be a pointer or a suitable map.
    34  func (d *OplogDoc) UnmarshalObject(out interface{}) error {
    35  	return d.unmarshal(d.Object, out)
    36  }
    37  
    38  // UnmarshalUpdate unmarshals the UpdateObject field into out. The out
    39  // argument should be a pointer or a suitable map.
    40  func (d *OplogDoc) UnmarshalUpdate(out interface{}) error {
    41  	return d.unmarshal(d.UpdateObject, out)
    42  }
    43  
    44  func (d *OplogDoc) unmarshal(raw *bson.Raw, out interface{}) error {
    45  	if raw == nil {
    46  		// If the field is not set, set out to the zero value for its type.
    47  		v := reflect.ValueOf(out)
    48  		switch v.Kind() {
    49  		case reflect.Ptr:
    50  			v = v.Elem()
    51  			v.Set(reflect.Zero(v.Type()))
    52  		case reflect.Map:
    53  			// Empty the map.
    54  			for _, k := range v.MapKeys() {
    55  				v.SetMapIndex(k, reflect.Value{})
    56  			}
    57  		default:
    58  			return errors.New("output must be a pointer or map")
    59  		}
    60  		return nil
    61  	}
    62  	return raw.Unmarshal(out)
    63  }
    64  
    65  // NewMongoTimestamp returns a bson.MongoTimestamp repesentation for
    66  // the time.Time given. Note that these timestamps are not the same
    67  // the usual MongoDB time fields. These are an internal format used
    68  // only in a few places such as the replication oplog.
    69  //
    70  // See: http://docs.mongodb.org/manual/reference/bson-types/#timestamps
    71  func NewMongoTimestamp(t time.Time) bson.MongoTimestamp {
    72  	unixTime := t.Unix()
    73  	if unixTime < 0 {
    74  		unixTime = 0
    75  	}
    76  	return bson.MongoTimestamp(unixTime << 32)
    77  }
    78  
    79  // GetOplog returns the the oplog collection in the local database.
    80  func GetOplog(session *mgo.Session) *mgo.Collection {
    81  	return session.DB("local").C("oplog.rs")
    82  }
    83  
    84  // NewOplogTailer returns a new OplogTailer.
    85  //
    86  // Arguments:
    87  // - "oplog" is the collection to use for the oplog. Typically this
    88  //   would be the result of GetOpLog.
    89  // - "query" can be used to limit the returned oplog entries. A
    90  //    typical filter would limit based on ns ("<database>.<collection>")
    91  //    and o (object).
    92  // - "initialTs" sets the operation timestamp to start returning
    93  //    results from. This can be used to avoid an expensive initial search
    94  //    through the oplog when the tailer first starts.
    95  //
    96  // Remember to call Stop on the returned OplogTailer when it is no
    97  // longer needed.
    98  func NewOplogTailer(
    99  	oplog *mgo.Collection,
   100  	query bson.D,
   101  	initialTs time.Time,
   102  ) *OplogTailer {
   103  	// Use a fresh session for the tailer.
   104  	session := oplog.Database.Session.Copy()
   105  	t := &OplogTailer{
   106  		oplog:     oplog.With(session),
   107  		query:     query,
   108  		initialTs: NewMongoTimestamp(initialTs),
   109  		outCh:     make(chan *OplogDoc),
   110  	}
   111  	go func() {
   112  		defer func() {
   113  			close(t.outCh)
   114  			t.tomb.Done()
   115  			session.Close()
   116  		}()
   117  		t.tomb.Kill(t.loop())
   118  	}()
   119  	return t
   120  }
   121  
   122  // OplogTailer tails MongoDB's replication oplog.
   123  type OplogTailer struct {
   124  	tomb      tomb.Tomb
   125  	oplog     *mgo.Collection
   126  	query     bson.D
   127  	initialTs bson.MongoTimestamp
   128  	outCh     chan *OplogDoc
   129  }
   130  
   131  // Out returns a channel that reports the oplog entries matching the
   132  // query passed to NewOplogTailer as they appear.
   133  func (t *OplogTailer) Out() <-chan *OplogDoc {
   134  	return t.outCh
   135  }
   136  
   137  // Dying returns a channel that will be closed with the OplogTailer is
   138  // shutting down.
   139  func (t *OplogTailer) Dying() <-chan struct{} {
   140  	return t.tomb.Dying()
   141  }
   142  
   143  // Stop shuts down the OplogTailer. It will block until shutdown is
   144  // complete.
   145  func (t *OplogTailer) Stop() error {
   146  	t.tomb.Kill(nil)
   147  	return t.tomb.Wait()
   148  }
   149  
   150  // Err returns the error that caused the OplogTailer to stop. If it
   151  // finished normally or hasn't stopped then nil will be returned.
   152  func (t *OplogTailer) Err() error {
   153  	return t.tomb.Err()
   154  }
   155  
   156  const oplogTailTimeout = time.Second
   157  
   158  func (t *OplogTailer) loop() error {
   159  	var iter *mgo.Iter
   160  
   161  	// lastTimestamp tracks the most recent oplog timestamp reported.
   162  	lastTimestamp := t.initialTs
   163  
   164  	// idsForLastTimestamp records the unique operation ids that have
   165  	// been reported for the most recently reported oplog
   166  	// timestamp. This is used to avoid re-reporting oplog entries
   167  	// when the iterator is restarted. These timestamps are unique for
   168  	// a given mongod but when there's multiple replicaset members
   169  	// it's possible for there to be multiple oplog entries for a
   170  	// given timestamp.
   171  	//
   172  	// See: http://docs.mongodb.org/v2.4/reference/bson-types/#timestamps
   173  	var idsForLastTimestamp []int64
   174  
   175  	for {
   176  		if t.dying() {
   177  			return tomb.ErrDying
   178  		}
   179  
   180  		if iter == nil {
   181  			// When recreating the iterator (required when the cursor
   182  			// is invalidated) avoid reporting oplog entries that have
   183  			// already been reported.
   184  			sel := append(t.query,
   185  				bson.DocElem{"ts", bson.D{{"$gte", lastTimestamp}}},
   186  				bson.DocElem{"h", bson.D{{"$nin", idsForLastTimestamp}}},
   187  			)
   188  			// Time the tail call out every second so that requests to
   189  			// stop can be honoured.
   190  			//
   191  			// TODO(mjs): Ideally -1 (no timeout) could be used here,
   192  			// with session.Close() being used to unblock Next() if
   193  			// the tailer should stop (these semantics are hinted at
   194  			// by the mgo docs). Unfortunately this can trigger
   195  			// panics. See: https://github.com/go-mgo/mgo/issues/121
   196  			query := t.oplog.Find(sel)
   197  			if isRealOplog(t.oplog) {
   198  				// Apply an optmisation that is only supported with
   199  				// the real oplog.
   200  				query = query.LogReplay()
   201  			}
   202  			iter = query.Tail(oplogTailTimeout)
   203  		}
   204  
   205  		var doc OplogDoc
   206  		if iter.Next(&doc) {
   207  			select {
   208  			case <-t.tomb.Dying():
   209  				return tomb.ErrDying
   210  			case t.outCh <- &doc:
   211  			}
   212  
   213  			if doc.Timestamp > lastTimestamp {
   214  				lastTimestamp = doc.Timestamp
   215  				idsForLastTimestamp = nil
   216  			}
   217  			idsForLastTimestamp = append(idsForLastTimestamp, doc.OperationId)
   218  		} else {
   219  			if err := iter.Err(); err != nil && err != mgo.ErrCursor {
   220  				return err
   221  			}
   222  			if iter.Timeout() {
   223  				continue
   224  			}
   225  			// No timeout and no error so cursor must have
   226  			// expired. Force it to be recreated next loop by marking
   227  			// it as nil.
   228  			iter = nil
   229  		}
   230  	}
   231  }
   232  
   233  func isRealOplog(c *mgo.Collection) bool {
   234  	return c.Database.Name == "local" && c.Name == "oplog.rs"
   235  }
   236  
   237  func (t *OplogTailer) dying() bool {
   238  	select {
   239  	case <-t.tomb.Dying():
   240  		return true
   241  	default:
   242  		return false
   243  	}
   244  }