github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/state/watcher/txnwatcher.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package watcher 5 6 import ( 7 "time" 8 9 "github.com/juju/errors" 10 "gopkg.in/juju/worker.v1" 11 "gopkg.in/mgo.v2" 12 "gopkg.in/mgo.v2/bson" 13 "gopkg.in/retry.v1" 14 "gopkg.in/tomb.v2" 15 16 "github.com/juju/juju/mongo" 17 ) 18 19 // Hub represents a pubsub hub. The TxnWatcher only ever publishes 20 // events to the hub. 21 type Hub interface { 22 Publish(topic string, data interface{}) <-chan struct{} 23 } 24 25 // Clock represents the time methods used. 26 type Clock interface { 27 Now() time.Time 28 After(time.Duration) <-chan time.Time 29 } 30 31 const ( 32 txnWatcherStarting = "starting" 33 txnWatcherSyncErr = "sync err" 34 txnWatcherCollection = "collection" 35 36 txnWatcherShortWait = 10 * time.Millisecond 37 ) 38 39 var ( 40 41 // PollStrategy is used to determine how long 42 // to delay between poll intervals. A new timer 43 // is created each time some watcher event is 44 // fired or if the old timer completes. 45 // 46 // It must not be changed when any watchers are active. 47 PollStrategy retry.Strategy = retry.Exponential{ 48 Initial: txnWatcherShortWait, 49 Factor: 1.5, 50 MaxDelay: 5 * time.Second, 51 } 52 53 // TxnPollNotifyFunc allows tests to be able to specify 54 // callbacks each time the database has been polled and processed. 55 TxnPollNotifyFunc func() 56 ) 57 58 type txnChange struct { 59 collection string 60 docID interface{} 61 revID int64 62 } 63 64 // A TxnWatcher watches the txns.log collection and publishes all change events 65 // to the hub. 66 type TxnWatcher struct { 67 hub Hub 68 clock Clock 69 logger Logger 70 71 tomb tomb.Tomb 72 iteratorFunc func() mongo.Iterator 73 log *mgo.Collection 74 75 // notifySync is copied from the package variable when the watcher 76 // is created. 77 notifySync func() 78 79 reportRequest chan chan map[string]interface{} 80 81 // syncEvents contain the events to be 82 // dispatched to the watcher channels. They're queued during 83 // processing and flushed at the end to simplify the algorithm. 84 // The two queues are separated because events from sync are 85 // handled in reverse order due to the way the algorithm works. 86 syncEvents []Change 87 88 // iteratorStepCount tracks how many documents we've read from the database 89 iteratorStepCount uint64 90 91 // changesCount tracks all sync events that we have processed 92 changesCount uint64 93 94 // syncEventsLastLen was the length of syncEvents when we did our last flush() 95 syncEventsLastLen int 96 97 // averageSyncLen tracks a filtered average of how long the sync event queue gets before we flush 98 averageSyncLen float64 99 100 // lastId is the most recent transaction id observed by a sync. 101 lastId interface{} 102 } 103 104 // TxnWatcherConfig contains the configuration parameters required 105 // for a NewTxnWatcher. 106 type TxnWatcherConfig struct { 107 // ChangeLog is usually the tnxs.log collection. 108 ChangeLog *mgo.Collection 109 // Hub is where the changes are published to. 110 Hub Hub 111 // Clock allows tests to control the advancing of time. 112 Clock Clock 113 // Logger is used to control where the log messages for this watcher go. 114 Logger Logger 115 // IteratorFunc can be overridden in tests to control what values the 116 // watcher sees. 117 IteratorFunc func() mongo.Iterator 118 } 119 120 // Validate ensures that all the values that have to be set are set. 121 func (config TxnWatcherConfig) Validate() error { 122 if config.ChangeLog == nil { 123 return errors.NotValidf("missing ChangeLog") 124 } 125 if config.Hub == nil { 126 return errors.NotValidf("missing Hub") 127 } 128 if config.Clock == nil { 129 return errors.NotValidf("missing Clock") 130 } 131 return nil 132 } 133 134 // New returns a new Watcher observing the changelog collection, 135 // which must be a capped collection maintained by mgo/txn. 136 func NewTxnWatcher(config TxnWatcherConfig) (*TxnWatcher, error) { 137 if err := config.Validate(); err != nil { 138 return nil, errors.Annotate(err, "new TxnWatcher invalid config") 139 } 140 141 w := &TxnWatcher{ 142 hub: config.Hub, 143 clock: config.Clock, 144 logger: config.Logger, 145 log: config.ChangeLog, 146 iteratorFunc: config.IteratorFunc, 147 notifySync: TxnPollNotifyFunc, 148 reportRequest: make(chan chan map[string]interface{}), 149 } 150 if w.iteratorFunc == nil { 151 w.iteratorFunc = w.iter 152 } 153 if w.logger == nil { 154 w.logger = noOpLogger{} 155 } 156 w.tomb.Go(func() error { 157 err := w.loop() 158 cause := errors.Cause(err) 159 // tomb expects ErrDying or ErrStillAlive as 160 // exact values, so we need to log and unwrap 161 // the error first. 162 if err != nil && cause != tomb.ErrDying { 163 w.logger.Infof("watcher loop failed: %v", err) 164 } 165 return cause 166 }) 167 return w, nil 168 } 169 170 // Kill is part of the worker.Worker interface. 171 func (w *TxnWatcher) Kill() { 172 w.tomb.Kill(nil) 173 } 174 175 // Wait is part of the worker.Worker interface. 176 func (w *TxnWatcher) Wait() error { 177 return w.tomb.Wait() 178 } 179 180 // Stop stops all the watcher activities. 181 func (w *TxnWatcher) Stop() error { 182 return worker.Stop(w) 183 } 184 185 // Dead returns a channel that is closed when the watcher has stopped. 186 func (w *TxnWatcher) Dead() <-chan struct{} { 187 return w.tomb.Dead() 188 } 189 190 // Err returns the error with which the watcher stopped. 191 // It returns nil if the watcher stopped cleanly, tomb.ErrStillAlive 192 // if the watcher is still running properly, or the respective error 193 // if the watcher is terminating or has terminated with an error. 194 func (w *TxnWatcher) Err() error { 195 return w.tomb.Err() 196 } 197 198 // Report is part of the watcher/runner Reporting interface, to expose runtime details of the watcher. 199 func (w *TxnWatcher) Report() map[string]interface{} { 200 // TODO: (jam) do we need to synchronize with the loop? 201 resCh := make(chan map[string]interface{}) 202 select { 203 case <-w.tomb.Dying(): 204 return nil 205 case w.reportRequest <- resCh: 206 break 207 } 208 select { 209 case <-w.tomb.Dying(): 210 return nil 211 case res := <-resCh: 212 return res 213 } 214 } 215 216 // loop implements the main watcher loop. 217 // period is the delay between each sync. 218 func (w *TxnWatcher) loop() error { 219 w.logger.Tracef("loop started") 220 defer w.logger.Tracef("loop finished") 221 // Make sure we have read the last ID before telling people 222 // we have started. 223 if err := w.initLastId(); err != nil { 224 return errors.Trace(err) 225 } 226 // Also make sure we have prepared the timer before 227 // we tell people we've started. 228 now := w.clock.Now() 229 backoff := PollStrategy.NewTimer(now) 230 d, _ := backoff.NextSleep(now) 231 next := w.clock.After(d) 232 w.hub.Publish(txnWatcherStarting, nil) 233 for { 234 select { 235 case <-w.tomb.Dying(): 236 return errors.Trace(tomb.ErrDying) 237 case <-next: 238 d, ok := backoff.NextSleep(w.clock.Now()) 239 if !ok { 240 // This shouldn't happen, but be defensive. 241 backoff = PollStrategy.NewTimer(w.clock.Now()) 242 } 243 next = w.clock.After(d) 244 case resCh := <-w.reportRequest: 245 report := map[string]interface{}{ 246 // How long was sync-events in our last flush 247 "sync-events-last-len": w.syncEventsLastLen, 248 // How long is sync-events on average 249 "sync-events-avg": int(w.averageSyncLen + 0.5), 250 // How long is the queue right now? (probably should always be 0 if we are at this point in the loop) 251 "sync-events-len": len(w.syncEvents), 252 // How big is our buffer 253 "sync-events-cap": cap(w.syncEvents), 254 // How many events have we actually generated 255 "total-changes": w.changesCount, 256 // How many database records have we read. note: because we have to iterate until we get to lastId, 257 // this is often a bit bigger than total-sync-events 258 "iterator-step-count": w.iteratorStepCount, 259 } 260 select { 261 case <-w.tomb.Dying(): 262 return errors.Trace(tomb.ErrDying) 263 case resCh <- report: 264 } 265 // This doesn't indicate we need to perform a sync 266 continue 267 } 268 269 added, err := w.sync() 270 if err != nil { 271 w.hub.Publish(txnWatcherSyncErr, nil) 272 return errors.Trace(err) 273 } 274 w.flush() 275 if added { 276 // Something's happened, so reset the exponential backoff 277 // so we'll retry again quickly. 278 backoff = PollStrategy.NewTimer(w.clock.Now()) 279 next = w.clock.After(txnWatcherShortWait) 280 } else if w.notifySync != nil { 281 w.notifySync() 282 } 283 } 284 } 285 286 // flush sends all pending events to their respective channels. 287 func (w *TxnWatcher) flush() { 288 // refreshEvents are stored newest first. 289 for i := len(w.syncEvents) - 1; i >= 0; i-- { 290 e := w.syncEvents[i] 291 w.hub.Publish(txnWatcherCollection, e) 292 } 293 w.averageSyncLen = (filterFactor * float64(len(w.syncEvents))) + ((1.0 - filterFactor) * w.averageSyncLen) 294 w.syncEventsLastLen = len(w.syncEvents) 295 w.syncEvents = w.syncEvents[:0] 296 // TODO(jam): 2018-11-07 Consider if averageSyncLen << cap(syncEvents) we should reallocate the buffer, so that it 297 // doesn't grow to the size of the largest-ever change and never shrink 298 } 299 300 // initLastId reads the most recent changelog document and initializes 301 // lastId with it. This causes all history that precedes the creation 302 // of the watcher to be ignored. 303 func (w *TxnWatcher) initLastId() error { 304 var entry struct { 305 Id interface{} `bson:"_id"` 306 } 307 err := w.log.Find(nil).Sort("-$natural").One(&entry) 308 if err != nil && err != mgo.ErrNotFound { 309 return errors.Trace(err) 310 } 311 w.lastId = entry.Id 312 return nil 313 } 314 315 func (w *TxnWatcher) iter() mongo.Iterator { 316 return w.log.Find(nil).Batch(10).Sort("-$natural").Iter() 317 } 318 319 // sync updates the watcher knowledge from the database, and 320 // queues events to observing channels. 321 func (w *TxnWatcher) sync() (bool, error) { 322 w.logger.Tracef("txn watcher %p starting sync", w) 323 added := false 324 // Iterate through log events in reverse insertion order (newest first). 325 iter := w.iteratorFunc() 326 seen := make(map[watchKey]bool) 327 first := true 328 lastId := w.lastId 329 var entry bson.D 330 for iter.Next(&entry) { 331 w.iteratorStepCount++ 332 if len(entry) == 0 { 333 w.logger.Tracef("got empty changelog document") 334 } 335 id := entry[0] 336 if id.Name != "_id" { 337 w.logger.Warningf("watcher: _id field isn't first entry") 338 continue 339 } 340 if first { 341 w.lastId = id.Value 342 first = false 343 } 344 if id.Value == lastId { 345 break 346 } 347 w.logger.Tracef("%p step %d got changelog document: %#v", w, w.iteratorStepCount, entry) 348 for _, c := range entry[1:] { 349 // See txn's Runner.ChangeLog for the structure of log entries. 350 var d, r []interface{} 351 dr, _ := c.Value.(bson.D) 352 for _, item := range dr { 353 switch item.Name { 354 case "d": 355 d, _ = item.Value.([]interface{}) 356 case "r": 357 r, _ = item.Value.([]interface{}) 358 } 359 } 360 if len(d) == 0 || len(d) != len(r) { 361 w.logger.Warningf("changelog has invalid collection document: %#v", c) 362 continue 363 } 364 for i := len(d) - 1; i >= 0; i-- { 365 key := watchKey{c.Name, d[i]} 366 if seen[key] { 367 continue 368 } 369 seen[key] = true 370 revno, ok := r[i].(int64) 371 if !ok { 372 w.logger.Warningf("changelog has revno with type %T: %#v", r[i], r[i]) 373 continue 374 } 375 if revno < 0 { 376 revno = -1 377 } 378 w.syncEvents = append(w.syncEvents, Change{ 379 C: c.Name, 380 Id: d[i], 381 Revno: revno, 382 }) 383 w.changesCount++ 384 added = true 385 } 386 } 387 } 388 if err := iter.Close(); err != nil { 389 return false, errors.Annotate(err, "watcher iteration error") 390 } 391 return added, nil 392 }