github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/mongo/oplog.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package mongo 5 6 import ( 7 "reflect" 8 "time" 9 10 "github.com/juju/errors" 11 "github.com/juju/mgo/v3" 12 "github.com/juju/mgo/v3/bson" 13 "gopkg.in/tomb.v2" 14 ) 15 16 // OplogDoc represents a document in the oplog.rs collection. 17 // See: http://www.kchodorow.com/blog/2010/10/12/replication-internals/ 18 // 19 // The Object and UpdateObject fields are returned raw to allow 20 // unmarshalling into arbitrary types. Use the UnmarshalObject and 21 // UnmarshalUpdate methods to unmarshall these fields. 22 type OplogDoc struct { 23 Timestamp bson.MongoTimestamp `bson:"ts"` 24 OperationId int64 `bson:"h"` 25 MongoVersion int `bson:"v"` 26 Operation string `bson:"op"` // "i" - insert, "u" - update, "d" - delete 27 Namespace string `bson:"ns"` 28 Object *bson.Raw `bson:"o"` 29 UpdateObject *bson.Raw `bson:"o2"` 30 } 31 32 // UnmarshalObject unmarshals the Object field into out. The out 33 // argument should be a pointer or a suitable map. 34 func (d *OplogDoc) UnmarshalObject(out interface{}) error { 35 return d.unmarshal(d.Object, out) 36 } 37 38 // UnmarshalUpdate unmarshals the UpdateObject field into out. The out 39 // argument should be a pointer or a suitable map. 40 func (d *OplogDoc) UnmarshalUpdate(out interface{}) error { 41 return d.unmarshal(d.UpdateObject, out) 42 } 43 44 func (d *OplogDoc) unmarshal(raw *bson.Raw, out interface{}) error { 45 if raw == nil { 46 // If the field is not set, set out to the zero value for its type. 47 v := reflect.ValueOf(out) 48 switch v.Kind() { 49 case reflect.Ptr: 50 v = v.Elem() 51 v.Set(reflect.Zero(v.Type())) 52 case reflect.Map: 53 // Empty the map. 54 for _, k := range v.MapKeys() { 55 v.SetMapIndex(k, reflect.Value{}) 56 } 57 default: 58 return errors.New("output must be a pointer or map") 59 } 60 return nil 61 } 62 return raw.Unmarshal(out) 63 } 64 65 // NewMongoTimestamp returns a bson.MongoTimestamp repesentation for 66 // the time.Time given. Note that these timestamps are not the same 67 // the usual MongoDB time fields. These are an internal format used 68 // only in a few places such as the replication oplog. 69 // 70 // See: http://docs.mongodb.org/manual/reference/bson-types/#timestamps 71 func NewMongoTimestamp(t time.Time) bson.MongoTimestamp { 72 unixTime := t.Unix() 73 if unixTime < 0 { 74 unixTime = 0 75 } 76 return bson.MongoTimestamp(unixTime << 32) 77 } 78 79 // GetOplog returns the the oplog collection in the local database. 80 func GetOplog(session *mgo.Session) *mgo.Collection { 81 return session.DB("local").C("oplog.rs") 82 } 83 84 func isRealOplog(c *mgo.Collection) bool { 85 return c.Database.Name == "local" && c.Name == "oplog.rs" 86 } 87 88 // OplogSession represents a connection to the oplog store, used 89 // to create an iterator to get oplog documents (and recreate it if it 90 // gets killed or times out). 91 type OplogSession interface { 92 NewIter(bson.MongoTimestamp, []int64) Iterator 93 Close() 94 } 95 96 type oplogSession struct { 97 session *mgo.Session 98 collection *mgo.Collection 99 query bson.D 100 } 101 102 // NewOplogSession defines a new OplogSession. 103 // 104 // Arguments: 105 // - "collection" is the collection to use for the oplog. Typically this 106 // would be the result of GetOpLog. 107 // - "query" can be used to limit the returned oplog entries. A 108 // typical filter would limit based on ns ("<database>.<collection>") 109 // and o (object). 110 // 111 // The returned session should be `Close`d when it's no longer needed. 112 func NewOplogSession(collection *mgo.Collection, query bson.D) *oplogSession { 113 // Use a fresh session for the tailer. 114 session := collection.Database.Session.Copy() 115 return &oplogSession{ 116 session: session, 117 collection: collection.With(session), 118 query: query, 119 } 120 } 121 122 const oplogTailTimeout = time.Second 123 124 func (s *oplogSession) NewIter(fromTimestamp bson.MongoTimestamp, excludeIds []int64) Iterator { 125 // When recreating the iterator (required when the cursor 126 // is invalidated) avoid reporting oplog entries that have 127 // already been reported. 128 sel := append(s.query, 129 bson.DocElem{"ts", bson.D{{"$gte", fromTimestamp}}}, 130 bson.DocElem{"h", bson.D{{"$nin", excludeIds}}}, 131 ) 132 133 query := s.collection.Find(sel) 134 if isRealOplog(s.collection) { 135 // Apply an optimisation that is only supported with 136 // the real oplog. 137 query = query.LogReplay() 138 } 139 140 // Time the tail call out every second so that requests to 141 // stop can be honoured. 142 return query.Tail(oplogTailTimeout) 143 } 144 145 func (s *oplogSession) Close() { 146 s.session.Close() 147 } 148 149 // NewOplogTailer returns a new OplogTailer. 150 // 151 // Arguments: 152 // - "session" determines the collection and filtering on records that 153 // should be returned. 154 // - "initialTs" sets the operation timestamp to start returning 155 // results from. This can be used to avoid an expensive initial search 156 // through the oplog when the tailer first starts. 157 // 158 // Remember to call Stop on the returned OplogTailer when it is no 159 // longer needed. 160 func NewOplogTailer( 161 session OplogSession, 162 initialTs time.Time, 163 ) *OplogTailer { 164 t := &OplogTailer{ 165 session: session, 166 initialTs: NewMongoTimestamp(initialTs), 167 outCh: make(chan *OplogDoc), 168 } 169 t.tomb.Go(func() error { 170 defer func() { 171 close(t.outCh) 172 session.Close() 173 }() 174 return t.loop() 175 }) 176 return t 177 } 178 179 // OplogTailer tails MongoDB's replication oplog. 180 type OplogTailer struct { 181 tomb tomb.Tomb 182 session OplogSession 183 initialTs bson.MongoTimestamp 184 outCh chan *OplogDoc 185 } 186 187 // Out returns a channel that reports the oplog entries matching the 188 // query passed to NewOplogTailer as they appear. 189 func (t *OplogTailer) Out() <-chan *OplogDoc { 190 return t.outCh 191 } 192 193 // Dying returns a channel that will be closed with the OplogTailer is 194 // shutting down. 195 func (t *OplogTailer) Dying() <-chan struct{} { 196 return t.tomb.Dying() 197 } 198 199 // Stop shuts down the OplogTailer. It will block until shutdown is 200 // complete. 201 func (t *OplogTailer) Stop() error { 202 t.tomb.Kill(nil) 203 return t.tomb.Wait() 204 } 205 206 // Err returns the error that caused the OplogTailer to stop. If it 207 // finished normally or hasn't stopped then nil will be returned. 208 func (t *OplogTailer) Err() error { 209 return t.tomb.Err() 210 } 211 212 func (t *OplogTailer) loop() error { 213 // lastTimestamp tracks the most recent oplog timestamp reported. 214 lastTimestamp := t.initialTs 215 216 // idsForLastTimestamp records the unique operation ids that have 217 // been reported for the most recently reported oplog 218 // timestamp. This is used to avoid re-reporting oplog entries 219 // when the iterator is restarted. These timestamps are unique for 220 // a given mongod but when there's multiple replicaset members 221 // it's possible for there to be multiple oplog entries for a 222 // given timestamp. 223 // 224 // See: http://docs.mongodb.org/v2.4/reference/bson-types/#timestamps 225 var idsForLastTimestamp []int64 226 227 newIter := func() Iterator { 228 return t.session.NewIter(lastTimestamp, idsForLastTimestamp) 229 } 230 iter := newIter() 231 defer func() { iter.Close() }() // iter may be replaced, hence closure 232 233 for { 234 if t.dying() { 235 return tomb.ErrDying 236 } 237 var doc OplogDoc 238 if iter.Next(&doc) { 239 select { 240 case <-t.tomb.Dying(): 241 return tomb.ErrDying 242 case t.outCh <- &doc: 243 } 244 if doc.Timestamp > lastTimestamp { 245 lastTimestamp = doc.Timestamp 246 idsForLastTimestamp = nil 247 } 248 idsForLastTimestamp = append(idsForLastTimestamp, doc.OperationId) 249 } else { 250 if iter.Timeout() { 251 continue 252 } 253 if err := iter.Close(); err != nil && err != mgo.ErrCursor { 254 return err 255 } 256 // Either there's no error or the error is an expired 257 // cursor; Recreate the iterator. 258 iter = newIter() 259 } 260 } 261 } 262 263 func (t *OplogTailer) dying() bool { 264 select { 265 case <-t.tomb.Dying(): 266 return true 267 default: 268 return false 269 } 270 }