github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/mongo/oplog.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package mongo 5 6 import ( 7 "reflect" 8 "time" 9 10 "github.com/juju/errors" 11 "gopkg.in/mgo.v2" 12 "gopkg.in/mgo.v2/bson" 13 "launchpad.net/tomb" 14 ) 15 16 // OplogDoc represents a document in the oplog.rs collection. 17 // See: http://www.kchodorow.com/blog/2010/10/12/replication-internals/ 18 // 19 // The Object and UpdateObject fields are returned raw to allow 20 // unmarshalling into arbitrary types. Use the UnmarshalObject and 21 // UnmarshalUpdate methods to unmarshall these fields. 22 type OplogDoc struct { 23 Timestamp bson.MongoTimestamp `bson:"ts"` 24 OperationId int64 `bson:"h"` 25 MongoVersion int `bson:"v"` 26 Operation string `bson:"op"` // "i" - insert, "u" - update, "d" - delete 27 Namespace string `bson:"ns"` 28 Object *bson.Raw `bson:"o"` 29 UpdateObject *bson.Raw `bson:"o2"` 30 } 31 32 // UnmarshalObject unmarshals the Object field into out. The out 33 // argument should be a pointer or a suitable map. 34 func (d *OplogDoc) UnmarshalObject(out interface{}) error { 35 return d.unmarshal(d.Object, out) 36 } 37 38 // UnmarshalUpdate unmarshals the UpdateObject field into out. The out 39 // argument should be a pointer or a suitable map. 40 func (d *OplogDoc) UnmarshalUpdate(out interface{}) error { 41 return d.unmarshal(d.UpdateObject, out) 42 } 43 44 func (d *OplogDoc) unmarshal(raw *bson.Raw, out interface{}) error { 45 if raw == nil { 46 // If the field is not set, set out to the zero value for its type. 47 v := reflect.ValueOf(out) 48 switch v.Kind() { 49 case reflect.Ptr: 50 v = v.Elem() 51 v.Set(reflect.Zero(v.Type())) 52 case reflect.Map: 53 // Empty the map. 54 for _, k := range v.MapKeys() { 55 v.SetMapIndex(k, reflect.Value{}) 56 } 57 default: 58 return errors.New("output must be a pointer or map") 59 } 60 return nil 61 } 62 return raw.Unmarshal(out) 63 } 64 65 // NewMongoTimestamp returns a bson.MongoTimestamp repesentation for 66 // the time.Time given. Note that these timestamps are not the same 67 // the usual MongoDB time fields. These are an internal format used 68 // only in a few places such as the replication oplog. 69 // 70 // See: http://docs.mongodb.org/manual/reference/bson-types/#timestamps 71 func NewMongoTimestamp(t time.Time) bson.MongoTimestamp { 72 unixTime := t.Unix() 73 if unixTime < 0 { 74 unixTime = 0 75 } 76 return bson.MongoTimestamp(unixTime << 32) 77 } 78 79 // GetOplog returns the the oplog collection in the local database. 80 func GetOplog(session *mgo.Session) *mgo.Collection { 81 return session.DB("local").C("oplog.rs") 82 } 83 84 // NewOplogTailer returns a new OplogTailer. 85 // 86 // Arguments: 87 // - "oplog" is the collection to use for the oplog. Typically this 88 // would be the result of GetOpLog. 89 // - "query" can be used to limit the returned oplog entries. A 90 // typical filter would limit based on ns ("<database>.<collection>") 91 // and o (object). 92 // - "initialTs" sets the operation timestamp to start returning 93 // results from. This can be used to avoid an expensive initial search 94 // through the oplog when the tailer first starts. 95 // 96 // Remember to call Stop on the returned OplogTailer when it is no 97 // longer needed. 98 func NewOplogTailer( 99 oplog *mgo.Collection, 100 query bson.D, 101 initialTs time.Time, 102 ) *OplogTailer { 103 // Use a fresh session for the tailer. 104 session := oplog.Database.Session.Copy() 105 t := &OplogTailer{ 106 oplog: oplog.With(session), 107 query: query, 108 initialTs: NewMongoTimestamp(initialTs), 109 outCh: make(chan *OplogDoc), 110 } 111 go func() { 112 defer func() { 113 close(t.outCh) 114 t.tomb.Done() 115 session.Close() 116 }() 117 t.tomb.Kill(t.loop()) 118 }() 119 return t 120 } 121 122 // OplogTailer tails MongoDB's replication oplog. 123 type OplogTailer struct { 124 tomb tomb.Tomb 125 oplog *mgo.Collection 126 query bson.D 127 initialTs bson.MongoTimestamp 128 outCh chan *OplogDoc 129 } 130 131 // Out returns a channel that reports the oplog entries matching the 132 // query passed to NewOplogTailer as they appear. 133 func (t *OplogTailer) Out() <-chan *OplogDoc { 134 return t.outCh 135 } 136 137 // Dying returns a channel that will be closed with the OplogTailer is 138 // shutting down. 139 func (t *OplogTailer) Dying() <-chan struct{} { 140 return t.tomb.Dying() 141 } 142 143 // Stop shuts down the OplogTailer. It will block until shutdown is 144 // complete. 145 func (t *OplogTailer) Stop() error { 146 t.tomb.Kill(nil) 147 return t.tomb.Wait() 148 } 149 150 // Err returns the error that caused the OplogTailer to stop. If it 151 // finished normally or hasn't stopped then nil will be returned. 152 func (t *OplogTailer) Err() error { 153 return t.tomb.Err() 154 } 155 156 const oplogTailTimeout = time.Second 157 158 func (t *OplogTailer) loop() error { 159 var iter *mgo.Iter 160 161 // lastTimestamp tracks the most recent oplog timestamp reported. 162 lastTimestamp := t.initialTs 163 164 // idsForLastTimestamp records the unique operation ids that have 165 // been reported for the most recently reported oplog 166 // timestamp. This is used to avoid re-reporting oplog entries 167 // when the iterator is restarted. These timestamps are unique for 168 // a given mongod but when there's multiple replicaset members 169 // it's possible for there to be multiple oplog entries for a 170 // given timestamp. 171 // 172 // See: http://docs.mongodb.org/v2.4/reference/bson-types/#timestamps 173 var idsForLastTimestamp []int64 174 175 for { 176 if t.dying() { 177 return tomb.ErrDying 178 } 179 180 if iter == nil { 181 // When recreating the iterator (required when the cursor 182 // is invalidated) avoid reporting oplog entries that have 183 // already been reported. 184 sel := append(t.query, 185 bson.DocElem{"ts", bson.D{{"$gte", lastTimestamp}}}, 186 bson.DocElem{"h", bson.D{{"$nin", idsForLastTimestamp}}}, 187 ) 188 // Time the tail call out every second so that requests to 189 // stop can be honoured. 190 // 191 // TODO(mjs): Ideally -1 (no timeout) could be used here, 192 // with session.Close() being used to unblock Next() if 193 // the tailer should stop (these semantics are hinted at 194 // by the mgo docs). Unfortunately this can trigger 195 // panics. See: https://github.com/go-mgo/mgo/issues/121 196 query := t.oplog.Find(sel) 197 if isRealOplog(t.oplog) { 198 // Apply an optmisation that is only supported with 199 // the real oplog. 200 query = query.LogReplay() 201 } 202 iter = query.Tail(oplogTailTimeout) 203 } 204 205 var doc OplogDoc 206 if iter.Next(&doc) { 207 select { 208 case <-t.tomb.Dying(): 209 return tomb.ErrDying 210 case t.outCh <- &doc: 211 } 212 213 if doc.Timestamp > lastTimestamp { 214 lastTimestamp = doc.Timestamp 215 idsForLastTimestamp = nil 216 } 217 idsForLastTimestamp = append(idsForLastTimestamp, doc.OperationId) 218 } else { 219 if err := iter.Err(); err != nil && err != mgo.ErrCursor { 220 return err 221 } 222 if iter.Timeout() { 223 continue 224 } 225 // No timeout and no error so cursor must have 226 // expired. Force it to be recreated next loop by marking 227 // it as nil. 228 iter = nil 229 } 230 } 231 } 232 233 func isRealOplog(c *mgo.Collection) bool { 234 return c.Database.Name == "local" && c.Name == "oplog.rs" 235 } 236 237 func (t *OplogTailer) dying() bool { 238 select { 239 case <-t.tomb.Dying(): 240 return true 241 default: 242 return false 243 } 244 }