github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/mongo/oplog.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package mongo
     5  
     6  import (
     7  	"reflect"
     8  	"time"
     9  
    10  	"github.com/juju/errors"
    11  	"github.com/juju/mgo/v3"
    12  	"github.com/juju/mgo/v3/bson"
    13  	"gopkg.in/tomb.v2"
    14  )
    15  
    16  // OplogDoc represents a document in the oplog.rs collection.
    17  // See: http://www.kchodorow.com/blog/2010/10/12/replication-internals/
    18  //
    19  // The Object and UpdateObject fields are returned raw to allow
    20  // unmarshalling into arbitrary types. Use the UnmarshalObject and
    21  // UnmarshalUpdate methods to unmarshall these fields.
    22  type OplogDoc struct {
    23  	Timestamp    bson.MongoTimestamp `bson:"ts"`
    24  	OperationId  int64               `bson:"h"`
    25  	MongoVersion int                 `bson:"v"`
    26  	Operation    string              `bson:"op"` // "i" - insert, "u" - update, "d" - delete
    27  	Namespace    string              `bson:"ns"`
    28  	Object       *bson.Raw           `bson:"o"`
    29  	UpdateObject *bson.Raw           `bson:"o2"`
    30  }
    31  
    32  // UnmarshalObject unmarshals the Object field into out. The out
    33  // argument should be a pointer or a suitable map.
    34  func (d *OplogDoc) UnmarshalObject(out interface{}) error {
    35  	return d.unmarshal(d.Object, out)
    36  }
    37  
    38  // UnmarshalUpdate unmarshals the UpdateObject field into out. The out
    39  // argument should be a pointer or a suitable map.
    40  func (d *OplogDoc) UnmarshalUpdate(out interface{}) error {
    41  	return d.unmarshal(d.UpdateObject, out)
    42  }
    43  
    44  func (d *OplogDoc) unmarshal(raw *bson.Raw, out interface{}) error {
    45  	if raw == nil {
    46  		// If the field is not set, set out to the zero value for its type.
    47  		v := reflect.ValueOf(out)
    48  		switch v.Kind() {
    49  		case reflect.Ptr:
    50  			v = v.Elem()
    51  			v.Set(reflect.Zero(v.Type()))
    52  		case reflect.Map:
    53  			// Empty the map.
    54  			for _, k := range v.MapKeys() {
    55  				v.SetMapIndex(k, reflect.Value{})
    56  			}
    57  		default:
    58  			return errors.New("output must be a pointer or map")
    59  		}
    60  		return nil
    61  	}
    62  	return raw.Unmarshal(out)
    63  }
    64  
    65  // NewMongoTimestamp returns a bson.MongoTimestamp repesentation for
    66  // the time.Time given. Note that these timestamps are not the same
    67  // the usual MongoDB time fields. These are an internal format used
    68  // only in a few places such as the replication oplog.
    69  //
    70  // See: http://docs.mongodb.org/manual/reference/bson-types/#timestamps
    71  func NewMongoTimestamp(t time.Time) bson.MongoTimestamp {
    72  	unixTime := t.Unix()
    73  	if unixTime < 0 {
    74  		unixTime = 0
    75  	}
    76  	return bson.MongoTimestamp(unixTime << 32)
    77  }
    78  
    79  // GetOplog returns the the oplog collection in the local database.
    80  func GetOplog(session *mgo.Session) *mgo.Collection {
    81  	return session.DB("local").C("oplog.rs")
    82  }
    83  
    84  func isRealOplog(c *mgo.Collection) bool {
    85  	return c.Database.Name == "local" && c.Name == "oplog.rs"
    86  }
    87  
    88  // OplogSession represents a connection to the oplog store, used
    89  // to create an iterator to get oplog documents (and recreate it if it
    90  // gets killed or times out).
    91  type OplogSession interface {
    92  	NewIter(bson.MongoTimestamp, []int64) Iterator
    93  	Close()
    94  }
    95  
    96  type oplogSession struct {
    97  	session    *mgo.Session
    98  	collection *mgo.Collection
    99  	query      bson.D
   100  }
   101  
   102  // NewOplogSession defines a new OplogSession.
   103  //
   104  // Arguments:
   105  //   - "collection" is the collection to use for the oplog. Typically this
   106  //     would be the result of GetOpLog.
   107  //   - "query" can be used to limit the returned oplog entries. A
   108  //     typical filter would limit based on ns ("<database>.<collection>")
   109  //     and o (object).
   110  //
   111  // The returned session should be `Close`d when it's no longer needed.
   112  func NewOplogSession(collection *mgo.Collection, query bson.D) *oplogSession {
   113  	// Use a fresh session for the tailer.
   114  	session := collection.Database.Session.Copy()
   115  	return &oplogSession{
   116  		session:    session,
   117  		collection: collection.With(session),
   118  		query:      query,
   119  	}
   120  }
   121  
   122  const oplogTailTimeout = time.Second
   123  
   124  func (s *oplogSession) NewIter(fromTimestamp bson.MongoTimestamp, excludeIds []int64) Iterator {
   125  	// When recreating the iterator (required when the cursor
   126  	// is invalidated) avoid reporting oplog entries that have
   127  	// already been reported.
   128  	sel := append(s.query,
   129  		bson.DocElem{"ts", bson.D{{"$gte", fromTimestamp}}},
   130  		bson.DocElem{"h", bson.D{{"$nin", excludeIds}}},
   131  	)
   132  
   133  	query := s.collection.Find(sel)
   134  	if isRealOplog(s.collection) {
   135  		// Apply an optimisation that is only supported with
   136  		// the real oplog.
   137  		query = query.LogReplay()
   138  	}
   139  
   140  	// Time the tail call out every second so that requests to
   141  	// stop can be honoured.
   142  	return query.Tail(oplogTailTimeout)
   143  }
   144  
   145  func (s *oplogSession) Close() {
   146  	s.session.Close()
   147  }
   148  
   149  // NewOplogTailer returns a new OplogTailer.
   150  //
   151  // Arguments:
   152  //   - "session" determines the collection and filtering on records that
   153  //     should be returned.
   154  //   - "initialTs" sets the operation timestamp to start returning
   155  //     results from. This can be used to avoid an expensive initial search
   156  //     through the oplog when the tailer first starts.
   157  //
   158  // Remember to call Stop on the returned OplogTailer when it is no
   159  // longer needed.
   160  func NewOplogTailer(
   161  	session OplogSession,
   162  	initialTs time.Time,
   163  ) *OplogTailer {
   164  	t := &OplogTailer{
   165  		session:   session,
   166  		initialTs: NewMongoTimestamp(initialTs),
   167  		outCh:     make(chan *OplogDoc),
   168  	}
   169  	t.tomb.Go(func() error {
   170  		defer func() {
   171  			close(t.outCh)
   172  			session.Close()
   173  		}()
   174  		return t.loop()
   175  	})
   176  	return t
   177  }
   178  
   179  // OplogTailer tails MongoDB's replication oplog.
   180  type OplogTailer struct {
   181  	tomb      tomb.Tomb
   182  	session   OplogSession
   183  	initialTs bson.MongoTimestamp
   184  	outCh     chan *OplogDoc
   185  }
   186  
   187  // Out returns a channel that reports the oplog entries matching the
   188  // query passed to NewOplogTailer as they appear.
   189  func (t *OplogTailer) Out() <-chan *OplogDoc {
   190  	return t.outCh
   191  }
   192  
   193  // Dying returns a channel that will be closed with the OplogTailer is
   194  // shutting down.
   195  func (t *OplogTailer) Dying() <-chan struct{} {
   196  	return t.tomb.Dying()
   197  }
   198  
   199  // Stop shuts down the OplogTailer. It will block until shutdown is
   200  // complete.
   201  func (t *OplogTailer) Stop() error {
   202  	t.tomb.Kill(nil)
   203  	return t.tomb.Wait()
   204  }
   205  
   206  // Err returns the error that caused the OplogTailer to stop. If it
   207  // finished normally or hasn't stopped then nil will be returned.
   208  func (t *OplogTailer) Err() error {
   209  	return t.tomb.Err()
   210  }
   211  
   212  func (t *OplogTailer) loop() error {
   213  	// lastTimestamp tracks the most recent oplog timestamp reported.
   214  	lastTimestamp := t.initialTs
   215  
   216  	// idsForLastTimestamp records the unique operation ids that have
   217  	// been reported for the most recently reported oplog
   218  	// timestamp. This is used to avoid re-reporting oplog entries
   219  	// when the iterator is restarted. These timestamps are unique for
   220  	// a given mongod but when there's multiple replicaset members
   221  	// it's possible for there to be multiple oplog entries for a
   222  	// given timestamp.
   223  	//
   224  	// See: http://docs.mongodb.org/v2.4/reference/bson-types/#timestamps
   225  	var idsForLastTimestamp []int64
   226  
   227  	newIter := func() Iterator {
   228  		return t.session.NewIter(lastTimestamp, idsForLastTimestamp)
   229  	}
   230  	iter := newIter()
   231  	defer func() { iter.Close() }() // iter may be replaced, hence closure
   232  
   233  	for {
   234  		if t.dying() {
   235  			return tomb.ErrDying
   236  		}
   237  		var doc OplogDoc
   238  		if iter.Next(&doc) {
   239  			select {
   240  			case <-t.tomb.Dying():
   241  				return tomb.ErrDying
   242  			case t.outCh <- &doc:
   243  			}
   244  			if doc.Timestamp > lastTimestamp {
   245  				lastTimestamp = doc.Timestamp
   246  				idsForLastTimestamp = nil
   247  			}
   248  			idsForLastTimestamp = append(idsForLastTimestamp, doc.OperationId)
   249  		} else {
   250  			if iter.Timeout() {
   251  				continue
   252  			}
   253  			if err := iter.Close(); err != nil && err != mgo.ErrCursor {
   254  				return err
   255  			}
   256  			// Either there's no error or the error is an expired
   257  			// cursor; Recreate the iterator.
   258  			iter = newIter()
   259  		}
   260  	}
   261  }
   262  
   263  func (t *OplogTailer) dying() bool {
   264  	select {
   265  	case <-t.tomb.Dying():
   266  		return true
   267  	default:
   268  		return false
   269  	}
   270  }