github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/mongo/oplog.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package mongo 5 6 import ( 7 "reflect" 8 "time" 9 10 "github.com/juju/errors" 11 "gopkg.in/mgo.v2" 12 "gopkg.in/mgo.v2/bson" 13 "gopkg.in/tomb.v1" 14 ) 15 16 // OplogDoc represents a document in the oplog.rs collection. 17 // See: http://www.kchodorow.com/blog/2010/10/12/replication-internals/ 18 // 19 // The Object and UpdateObject fields are returned raw to allow 20 // unmarshalling into arbitrary types. Use the UnmarshalObject and 21 // UnmarshalUpdate methods to unmarshall these fields. 22 type OplogDoc struct { 23 Timestamp bson.MongoTimestamp `bson:"ts"` 24 OperationId int64 `bson:"h"` 25 MongoVersion int `bson:"v"` 26 Operation string `bson:"op"` // "i" - insert, "u" - update, "d" - delete 27 Namespace string `bson:"ns"` 28 Object *bson.Raw `bson:"o"` 29 UpdateObject *bson.Raw `bson:"o2"` 30 } 31 32 // UnmarshalObject unmarshals the Object field into out. The out 33 // argument should be a pointer or a suitable map. 34 func (d *OplogDoc) UnmarshalObject(out interface{}) error { 35 return d.unmarshal(d.Object, out) 36 } 37 38 // UnmarshalUpdate unmarshals the UpdateObject field into out. The out 39 // argument should be a pointer or a suitable map. 40 func (d *OplogDoc) UnmarshalUpdate(out interface{}) error { 41 return d.unmarshal(d.UpdateObject, out) 42 } 43 44 func (d *OplogDoc) unmarshal(raw *bson.Raw, out interface{}) error { 45 if raw == nil { 46 // If the field is not set, set out to the zero value for its type. 47 v := reflect.ValueOf(out) 48 switch v.Kind() { 49 case reflect.Ptr: 50 v = v.Elem() 51 v.Set(reflect.Zero(v.Type())) 52 case reflect.Map: 53 // Empty the map. 54 for _, k := range v.MapKeys() { 55 v.SetMapIndex(k, reflect.Value{}) 56 } 57 default: 58 return errors.New("output must be a pointer or map") 59 } 60 return nil 61 } 62 return raw.Unmarshal(out) 63 } 64 65 // NewMongoTimestamp returns a bson.MongoTimestamp repesentation for 66 // the time.Time given. Note that these timestamps are not the same 67 // the usual MongoDB time fields. These are an internal format used 68 // only in a few places such as the replication oplog. 69 // 70 // See: http://docs.mongodb.org/manual/reference/bson-types/#timestamps 71 func NewMongoTimestamp(t time.Time) bson.MongoTimestamp { 72 unixTime := t.Unix() 73 if unixTime < 0 { 74 unixTime = 0 75 } 76 return bson.MongoTimestamp(unixTime << 32) 77 } 78 79 // GetOplog returns the the oplog collection in the local database. 80 func GetOplog(session *mgo.Session) *mgo.Collection { 81 return session.DB("local").C("oplog.rs") 82 } 83 84 func isRealOplog(c *mgo.Collection) bool { 85 return c.Database.Name == "local" && c.Name == "oplog.rs" 86 } 87 88 // OplogIterator defines the parts of the mgo.Iter that we use - this 89 // interface allows us to switch out the querying for testing. 90 type OplogIterator interface { 91 Next(interface{}) bool 92 Err() error 93 Timeout() bool 94 } 95 96 // OplogSession represents a connection to the oplog store, used 97 // to create an iterator to get oplog documents (and recreate it if it 98 // gets killed or times out). 99 type OplogSession interface { 100 NewIter(bson.MongoTimestamp, []int64) OplogIterator 101 Close() 102 } 103 104 type oplogSession struct { 105 session *mgo.Session 106 collection *mgo.Collection 107 query bson.D 108 } 109 110 // NewOplogSession defines a new OplogSession. 111 // 112 // Arguments: 113 // - "collection" is the collection to use for the oplog. Typically this 114 // would be the result of GetOpLog. 115 // - "query" can be used to limit the returned oplog entries. A 116 // typical filter would limit based on ns ("<database>.<collection>") 117 // and o (object). 118 // 119 // The returned session should be `Close`d when it's no longer needed. 120 func NewOplogSession(collection *mgo.Collection, query bson.D) *oplogSession { 121 // Use a fresh session for the tailer. 122 session := collection.Database.Session.Copy() 123 return &oplogSession{ 124 session: session, 125 collection: collection.With(session), 126 query: query, 127 } 128 } 129 130 const oplogTailTimeout = time.Second 131 132 func (s *oplogSession) NewIter(fromTimestamp bson.MongoTimestamp, excludeIds []int64) OplogIterator { 133 // When recreating the iterator (required when the cursor 134 // is invalidated) avoid reporting oplog entries that have 135 // already been reported. 136 sel := append(s.query, 137 bson.DocElem{"ts", bson.D{{"$gte", fromTimestamp}}}, 138 bson.DocElem{"h", bson.D{{"$nin", excludeIds}}}, 139 ) 140 141 query := s.collection.Find(sel) 142 if isRealOplog(s.collection) { 143 // Apply an optimisation that is only supported with 144 // the real oplog. 145 query = query.LogReplay() 146 } 147 148 // Time the tail call out every second so that requests to 149 // stop can be honoured. 150 return query.Tail(oplogTailTimeout) 151 } 152 153 func (s *oplogSession) Close() { 154 s.session.Close() 155 } 156 157 // NewOplogTailer returns a new OplogTailer. 158 // 159 // Arguments: 160 // - "session" determines the collection and filtering on records that 161 // should be returned. 162 // - "initialTs" sets the operation timestamp to start returning 163 // results from. This can be used to avoid an expensive initial search 164 // through the oplog when the tailer first starts. 165 // 166 // Remember to call Stop on the returned OplogTailer when it is no 167 // longer needed. 168 func NewOplogTailer( 169 session OplogSession, 170 initialTs time.Time, 171 ) *OplogTailer { 172 t := &OplogTailer{ 173 session: session, 174 initialTs: NewMongoTimestamp(initialTs), 175 outCh: make(chan *OplogDoc), 176 } 177 go func() { 178 defer func() { 179 close(t.outCh) 180 t.tomb.Done() 181 session.Close() 182 }() 183 t.tomb.Kill(t.loop()) 184 }() 185 return t 186 } 187 188 // OplogTailer tails MongoDB's replication oplog. 189 type OplogTailer struct { 190 tomb tomb.Tomb 191 session OplogSession 192 initialTs bson.MongoTimestamp 193 outCh chan *OplogDoc 194 } 195 196 // Out returns a channel that reports the oplog entries matching the 197 // query passed to NewOplogTailer as they appear. 198 func (t *OplogTailer) Out() <-chan *OplogDoc { 199 return t.outCh 200 } 201 202 // Dying returns a channel that will be closed with the OplogTailer is 203 // shutting down. 204 func (t *OplogTailer) Dying() <-chan struct{} { 205 return t.tomb.Dying() 206 } 207 208 // Stop shuts down the OplogTailer. It will block until shutdown is 209 // complete. 210 func (t *OplogTailer) Stop() error { 211 t.tomb.Kill(nil) 212 return t.tomb.Wait() 213 } 214 215 // Err returns the error that caused the OplogTailer to stop. If it 216 // finished normally or hasn't stopped then nil will be returned. 217 func (t *OplogTailer) Err() error { 218 return t.tomb.Err() 219 } 220 221 func (t *OplogTailer) loop() error { 222 var iter OplogIterator 223 224 // lastTimestamp tracks the most recent oplog timestamp reported. 225 lastTimestamp := t.initialTs 226 227 // idsForLastTimestamp records the unique operation ids that have 228 // been reported for the most recently reported oplog 229 // timestamp. This is used to avoid re-reporting oplog entries 230 // when the iterator is restarted. These timestamps are unique for 231 // a given mongod but when there's multiple replicaset members 232 // it's possible for there to be multiple oplog entries for a 233 // given timestamp. 234 // 235 // See: http://docs.mongodb.org/v2.4/reference/bson-types/#timestamps 236 var idsForLastTimestamp []int64 237 238 for { 239 if t.dying() { 240 return tomb.ErrDying 241 } 242 243 if iter == nil { 244 iter = t.session.NewIter(lastTimestamp, idsForLastTimestamp) 245 } 246 247 var doc OplogDoc 248 if iter.Next(&doc) { 249 select { 250 case <-t.tomb.Dying(): 251 return tomb.ErrDying 252 case t.outCh <- &doc: 253 } 254 255 if doc.Timestamp > lastTimestamp { 256 lastTimestamp = doc.Timestamp 257 idsForLastTimestamp = nil 258 } 259 idsForLastTimestamp = append(idsForLastTimestamp, doc.OperationId) 260 } else { 261 if err := iter.Err(); err != nil && err != mgo.ErrCursor { 262 return err 263 } 264 if iter.Timeout() { 265 continue 266 } 267 // Either there's no error or the error is an expired 268 // cursor. Force recreating the iterator next loop by 269 // marking it as nil. 270 iter = nil 271 } 272 } 273 } 274 275 func (t *OplogTailer) dying() bool { 276 select { 277 case <-t.tomb.Dying(): 278 return true 279 default: 280 return false 281 } 282 }