github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/state/watcher/watcher.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 // The watcher package provides an interface for observing changes 5 // to arbitrary MongoDB documents that are maintained via the 6 // mgo/txn transaction package. 7 package watcher 8 9 import ( 10 "fmt" 11 "strings" 12 "time" 13 14 "github.com/juju/errors" 15 "github.com/juju/loggo" 16 "gopkg.in/juju/worker.v1" 17 "gopkg.in/mgo.v2" 18 "gopkg.in/mgo.v2/bson" 19 "gopkg.in/tomb.v2" 20 21 "github.com/juju/juju/mongo" 22 jworker "github.com/juju/juju/worker" 23 ) 24 25 // BaseWatcher represents watch methods on the worker 26 // responsible for watching for database changes. 27 type BaseWatcher interface { 28 worker.Worker 29 30 Dead() <-chan struct{} 31 Err() error 32 33 // Watch will send events on the Change channel whenever the document you 34 // are watching is changed. Note that in order to not miss any changes, you 35 // should start Watching the document before you read the document. 36 // At this low level Watch layer, there will not be an initial event. 37 // Instead, Watch is synchronous, the Watch will not return until the 38 // watcher is registered. 39 // TODO(jam): 2019-01-31 Update Watch() to return an error rather now 40 // that it is synchronous 41 Watch(collection string, id interface{}, ch chan<- Change) 42 43 // WatchMulti is similar to Watch, it just allows you to watch a set of 44 // documents in the same collection in one request. Just like Watch, 45 // no event will be sent for documents that don't change. 46 WatchMulti(collection string, ids []interface{}, ch chan<- Change) error 47 48 // WatchCollection will give an event if any documents are modified/added/removed 49 // from the collection. 50 // TODO(jam): 2019-01-31 Update WatchCollection() to return an error rather now 51 // that it is synchronous 52 WatchCollection(collection string, ch chan<- Change) 53 54 // WatchCollectionWithFilter will give an event if any documents are modified/added/removed 55 // from the collection. Filter can be supplied to check if a given document 56 // should send an event. 57 // TODO(jam): 2019-01-31 Update WatchCollectionWithFilter() to return an error rather now 58 // that it is synchronous 59 WatchCollectionWithFilter(collection string, ch chan<- Change, filter func(interface{}) bool) 60 61 // Unwatch is an asynchronous request to stop watching a given watch. 62 // It is an error to try to Unwatch something that is not being watched. 63 // Note that Unwatch can be called for things that have been registered with 64 // either Watch() or WatchMulti(). For WatchCollection or WatchCollectionWithFilter 65 // use UnwatchCollection. 66 // TODO(jam): 2019-01-31 Currently Unwatching something that isn't watched 67 // is a panic, should we make the method synchronous and turn it into an error? 68 // Or just turn it into a no-op 69 Unwatch(collection string, id interface{}, ch chan<- Change) 70 71 // UnwatchCollection is used when you are done with a watch started with 72 // either WatchCollection or WatchCollectionWithFilter. You must pass in 73 // the same Change channel. Unwatching a collection that isn't being watched 74 // is an error that will panic(). 75 UnwatchCollection(collection string, ch chan<- Change) 76 } 77 78 var logger = loggo.GetLogger("juju.state.watcher") 79 80 // A Watcher can watch any number of collections and documents for changes. 81 type Watcher struct { 82 tomb tomb.Tomb 83 iteratorFunc func() mongo.Iterator 84 log *mgo.Collection 85 86 // watches holds the observers managed by Watch/Unwatch. 87 watches map[watchKey][]watchInfo 88 89 // needSync is set when a synchronization should take 90 // place. 91 needSync bool 92 93 // syncEvents and requestEvents contain the events to be 94 // dispatched to the watcher channels. They're queued during 95 // processing and flushed at the end to simplify the algorithm. 96 // The two queues are separated because events from sync are 97 // handled in reverse order due to the way the algorithm works. 98 syncEvents, requestEvents []event 99 100 // request is used to deliver requests from the public API into 101 // the the goroutine loop. 102 request chan interface{} 103 104 // lastId is the most recent transaction id observed by a sync. 105 lastId interface{} 106 } 107 108 // A Change holds information about a document change. 109 type Change struct { 110 // C and Id hold the collection name and document _id field value. 111 C string 112 Id interface{} 113 114 // Revno is the latest known value for the document's txn-revno 115 // field, or -1 if the document was deleted. 116 Revno int64 117 } 118 119 type watchKey struct { 120 c string 121 id interface{} // nil when watching collection 122 } 123 124 func (k watchKey) String() string { 125 coll := fmt.Sprintf("collection %q", k.c) 126 if k.id == nil { 127 return coll 128 } 129 if s, ok := k.id.(string); ok { 130 return fmt.Sprintf("document %q in %s", s, coll) 131 } 132 return fmt.Sprintf("document %v in %s", k.id, coll) 133 } 134 135 // match returns whether the receiving watch key, 136 // which may refer to a particular item or 137 // an entire collection, matches k1, which refers 138 // to a particular item. 139 func (k watchKey) match(k1 watchKey) bool { 140 if k.c != k1.c { 141 return false 142 } 143 if k.id == nil { 144 // k refers to entire collection 145 return true 146 } 147 return k.id == k1.id 148 } 149 150 type watchInfo struct { 151 ch chan<- Change 152 revno int64 153 filter func(interface{}) bool 154 } 155 156 type event struct { 157 ch chan<- Change 158 key watchKey 159 isDeleted bool 160 revno int64 161 } 162 163 // Period is the delay between each sync. 164 // It must not be changed when any watchers are active. 165 var Period time.Duration = 5 * time.Second 166 167 // New returns a new Watcher observing the changelog collection, 168 // which must be a capped collection maintained by mgo/txn. 169 func New(changelog *mgo.Collection) *Watcher { 170 return newWatcher(changelog, nil) 171 } 172 173 func newWatcher(changelog *mgo.Collection, iteratorFunc func() mongo.Iterator) *Watcher { 174 w := &Watcher{ 175 log: changelog, 176 iteratorFunc: iteratorFunc, 177 watches: make(map[watchKey][]watchInfo), 178 request: make(chan interface{}), 179 } 180 if w.iteratorFunc == nil { 181 w.iteratorFunc = w.iter 182 } 183 w.tomb.Go(func() error { 184 err := w.loop(Period) 185 cause := errors.Cause(err) 186 // tomb expects ErrDying or ErrStillAlive as 187 // exact values, so we need to log and unwrap 188 // the error first. 189 if err != nil && cause != tomb.ErrDying { 190 logger.Infof("watcher loop failed: %v", err) 191 } 192 return cause 193 }) 194 return w 195 } 196 197 // NewDead returns a new watcher that is already dead 198 // and always returns the given error from its Err method. 199 func NewDead(err error) *Watcher { 200 var w Watcher 201 w.tomb.Kill(errors.Trace(err)) 202 return &w 203 } 204 205 // Kill is part of the worker.Worker interface. 206 func (w *Watcher) Kill() { 207 w.tomb.Kill(nil) 208 } 209 210 // Wait is part of the worker.Worker interface. 211 func (w *Watcher) Wait() error { 212 return w.tomb.Wait() 213 } 214 215 // Stop stops all the watcher activities. 216 func (w *Watcher) Stop() error { 217 return worker.Stop(w) 218 } 219 220 // Dead returns a channel that is closed when the watcher has stopped. 221 func (w *Watcher) Dead() <-chan struct{} { 222 return w.tomb.Dead() 223 } 224 225 // Err returns the error with which the watcher stopped. 226 // It returns nil if the watcher stopped cleanly, tomb.ErrStillAlive 227 // if the watcher is still running properly, or the respective error 228 // if the watcher is terminating or has terminated with an error. 229 func (w *Watcher) Err() error { 230 return w.tomb.Err() 231 } 232 233 type reqWatch struct { 234 key watchKey 235 info watchInfo 236 // registeredCh is used to indicate when 237 registeredCh chan error 238 } 239 240 func (r reqWatch) Completed() chan error { 241 return r.registeredCh 242 } 243 244 type reqWatchMulti struct { 245 collection string 246 ids []interface{} 247 completedCh chan error 248 watchCh chan<- Change 249 } 250 251 func (r reqWatchMulti) Completed() chan error { 252 return r.completedCh 253 } 254 255 type reqUnwatch struct { 256 key watchKey 257 ch chan<- Change 258 } 259 260 type reqSync struct{} 261 262 // waitableRequest represents a request that is made, and you wait for the core loop to acknowledge the request has been 263 // received 264 type waitableRequest interface { 265 // Completed returns the channel that the core loop will use to signal completion of the request. 266 Completed() chan error 267 } 268 269 func (w *Watcher) sendReq(req interface{}) { 270 select { 271 case w.request <- req: 272 case <-w.tomb.Dying(): 273 } 274 } 275 276 func (w *Watcher) sendAndWaitReq(req waitableRequest) error { 277 select { 278 case w.request <- req: 279 case <-w.tomb.Dying(): 280 return errors.Trace(tomb.ErrDying) 281 } 282 completed := req.Completed() 283 select { 284 case err := <-completed: 285 return errors.Trace(err) 286 case <-w.tomb.Dying(): 287 return errors.Trace(tomb.ErrDying) 288 } 289 } 290 291 // Watchstarts watching the given collection and document id. 292 // An event will be sent onto ch whenever a matching document's txn-revno 293 // field is observed to change after a transaction is applied. 294 func (w *Watcher) Watch(collection string, id interface{}, ch chan<- Change) { 295 if id == nil { 296 panic("watcher: cannot watch a document with nil id") 297 } 298 w.sendAndWaitReq(reqWatch{ 299 key: watchKey{collection, id}, 300 info: watchInfo{ch, -2, nil}, 301 registeredCh: make(chan error), 302 }) 303 } 304 305 func (w *Watcher) WatchMulti(collection string, ids []interface{}, ch chan<- Change) error { 306 for _, id := range ids { 307 if id == nil { 308 return errors.Errorf("cannot watch a document with nil id") 309 } 310 } 311 req := reqWatchMulti{ 312 collection: collection, 313 ids: ids, 314 watchCh: ch, 315 completedCh: make(chan error), 316 } 317 w.sendReq(req) 318 select { 319 case err := <-req.completedCh: 320 return errors.Trace(err) 321 case <-w.tomb.Dying(): 322 return errors.Trace(tomb.ErrDying) 323 } 324 } 325 326 // WatchCollection starts watching the given collection. 327 // An event will be sent onto ch whenever the txn-revno field is observed 328 // to change after a transaction is applied for any document in the collection. 329 func (w *Watcher) WatchCollection(collection string, ch chan<- Change) { 330 w.WatchCollectionWithFilter(collection, ch, nil) 331 } 332 333 // WatchCollectionWithFilter starts watching the given collection. 334 // An event will be sent onto ch whenever the txn-revno field is observed 335 // to change after a transaction is applied for any document in the collection, so long as the 336 // specified filter function returns true when called with the document id value. 337 func (w *Watcher) WatchCollectionWithFilter(collection string, ch chan<- Change, filter func(interface{}) bool) { 338 w.sendAndWaitReq(reqWatch{ 339 key: watchKey{collection, nil}, 340 info: watchInfo{ch, 0, filter}, 341 registeredCh: make(chan error), 342 }) 343 } 344 345 // Unwatch stops watching the given collection and document id via ch. 346 func (w *Watcher) Unwatch(collection string, id interface{}, ch chan<- Change) { 347 if id == nil { 348 panic("watcher: cannot unwatch a document with nil id") 349 } 350 w.sendReq(reqUnwatch{watchKey{collection, id}, ch}) 351 } 352 353 // UnwatchCollection stops watching the given collection via ch. 354 func (w *Watcher) UnwatchCollection(collection string, ch chan<- Change) { 355 w.sendReq(reqUnwatch{watchKey{collection, nil}, ch}) 356 } 357 358 // StartSync forces the watcher to load new events from the database. 359 func (w *Watcher) StartSync() { 360 w.sendReq(reqSync{}) 361 } 362 363 // loop implements the main watcher loop. 364 // period is the delay between each sync. 365 func (w *Watcher) loop(period time.Duration) error { 366 next := time.After(period) 367 w.needSync = true 368 if err := w.initLastId(); err != nil { 369 return errors.Trace(err) 370 } 371 for { 372 if w.needSync { 373 if err := w.sync(); err != nil { 374 // If the txn log collection overflows from underneath us, 375 // the easiest cause of action to recover is to cause the 376 // agen tto restart. 377 if errors.Cause(err) == cappedPositionLostError { 378 // Ideally we'd not import the worker package but that's 379 // where all the errors are defined. 380 return jworker.ErrRestartAgent 381 } 382 return errors.Trace(err) 383 } 384 w.flush() 385 next = time.After(period) 386 } 387 select { 388 case <-w.tomb.Dying(): 389 return errors.Trace(tomb.ErrDying) 390 case <-next: 391 next = time.After(period) 392 w.needSync = true 393 case req := <-w.request: 394 w.handle(req) 395 w.flush() 396 } 397 } 398 } 399 400 // flush sends all pending events to their respective channels. 401 func (w *Watcher) flush() { 402 // refreshEvents are stored newest first. 403 for i := len(w.syncEvents) - 1; i >= 0; i-- { 404 e := &w.syncEvents[i] 405 for e.ch != nil { 406 change := Change{ 407 C: e.key.c, 408 Id: e.key.id, 409 Revno: e.revno, 410 } 411 select { 412 case <-w.tomb.Dying(): 413 return 414 case req := <-w.request: 415 w.handle(req) 416 continue 417 case e.ch <- change: 418 } 419 break 420 } 421 } 422 // requestEvents are stored oldest first, and 423 // may grow during the loop. 424 for i := 0; i < len(w.requestEvents); i++ { 425 e := &w.requestEvents[i] 426 for e.ch != nil { 427 change := Change{ 428 C: e.key.c, 429 Id: e.key.id, 430 Revno: e.revno, 431 } 432 select { 433 case <-w.tomb.Dying(): 434 return 435 case req := <-w.request: 436 w.handle(req) 437 continue 438 case e.ch <- change: 439 } 440 break 441 } 442 } 443 w.syncEvents = w.syncEvents[:0] 444 w.requestEvents = w.requestEvents[:0] 445 } 446 447 // handle deals with requests delivered by the public API 448 // onto the background watcher goroutine. 449 func (w *Watcher) handle(req interface{}) { 450 logger.Tracef("got request: %#v", req) 451 switch r := req.(type) { 452 case reqSync: 453 w.needSync = true 454 case reqWatch: 455 for _, info := range w.watches[r.key] { 456 if info.ch == r.info.ch { 457 panic(fmt.Errorf("tried to re-add channel %v for %s", info.ch, r.key)) 458 } 459 } 460 w.watches[r.key] = append(w.watches[r.key], r.info) 461 if r.registeredCh != nil { 462 select { 463 case r.registeredCh <- nil: 464 case <-w.tomb.Dying(): 465 } 466 } 467 case reqUnwatch: 468 watches := w.watches[r.key] 469 removed := false 470 for i, info := range watches { 471 if info.ch == r.ch { 472 watches[i] = watches[len(watches)-1] 473 w.watches[r.key] = watches[:len(watches)-1] 474 removed = true 475 break 476 } 477 } 478 if !removed { 479 panic(fmt.Errorf("tried to remove missing channel %v for %s", r.ch, r.key)) 480 } 481 for i := range w.requestEvents { 482 e := &w.requestEvents[i] 483 if r.key.match(e.key) && e.ch == r.ch { 484 e.ch = nil 485 } 486 } 487 for i := range w.syncEvents { 488 e := &w.syncEvents[i] 489 if r.key.match(e.key) && e.ch == r.ch { 490 e.ch = nil 491 } 492 } 493 case reqWatchMulti: 494 for _, id := range r.ids { 495 key := watchKey{c: r.collection, id: id} 496 for _, info := range w.watches[key] { 497 if info.ch == r.watchCh { 498 err := errors.Errorf("tried to re-add channel %v for %s", info.ch, key) 499 select { 500 case r.completedCh <- err: 501 case <-w.tomb.Dying(): 502 } 503 return 504 } 505 } 506 } 507 for _, id := range r.ids { 508 key := watchKey{c: r.collection, id: id} 509 w.watches[key] = append(w.watches[key], watchInfo{ch: r.watchCh, revno: -2, filter: nil}) 510 } 511 select { 512 case r.completedCh <- nil: 513 case <-w.tomb.Dying(): 514 } 515 default: 516 panic(fmt.Errorf("unknown request: %T", req)) 517 } 518 } 519 520 // initLastId reads the most recent changelog document and initializes 521 // lastId with it. This causes all history that precedes the creation 522 // of the watcher to be ignored. 523 func (w *Watcher) initLastId() error { 524 var entry struct { 525 Id interface{} `bson:"_id"` 526 } 527 err := w.log.Find(nil).Sort("-$natural").One(&entry) 528 if err != nil && err != mgo.ErrNotFound { 529 return errors.Trace(err) 530 } 531 w.lastId = entry.Id 532 return nil 533 } 534 535 func (w *Watcher) iter() mongo.Iterator { 536 return w.log.Find(nil).Batch(10).Sort("-$natural").Iter() 537 } 538 539 var cappedPositionLostError = errors.New("capped position lost") 540 541 // sync updates the watcher knowledge from the database, and 542 // queues events to observing channels. 543 func (w *Watcher) sync() error { 544 w.needSync = false 545 // Iterate through log events in reverse insertion order (newest first). 546 iter := w.iteratorFunc() 547 seen := make(map[watchKey]bool) 548 first := true 549 lastId := w.lastId 550 var entry bson.D 551 for iter.Next(&entry) { 552 if len(entry) == 0 { 553 logger.Tracef("got empty changelog document") 554 } 555 id := entry[0] 556 if id.Name != "_id" { 557 panic("watcher: _id field isn't first entry") 558 } 559 if first { 560 w.lastId = id.Value 561 first = false 562 } 563 if id.Value == lastId { 564 break 565 } 566 logger.Tracef("got changelog document: %#v", entry) 567 for _, c := range entry[1:] { 568 // See txn's Runner.ChangeLog for the structure of log entries. 569 var d, r []interface{} 570 dr, _ := c.Value.(bson.D) 571 for _, item := range dr { 572 switch item.Name { 573 case "d": 574 d, _ = item.Value.([]interface{}) 575 case "r": 576 r, _ = item.Value.([]interface{}) 577 } 578 } 579 if len(d) == 0 || len(d) != len(r) { 580 logger.Warningf("changelog has invalid collection document: %#v", c) 581 continue 582 } 583 for i := len(d) - 1; i >= 0; i-- { 584 key := watchKey{c.Name, d[i]} 585 if seen[key] { 586 continue 587 } 588 seen[key] = true 589 revno, ok := r[i].(int64) 590 if !ok { 591 logger.Warningf("changelog has revno with type %T: %#v", r[i], r[i]) 592 continue 593 } 594 if revno < 0 { 595 revno = -1 596 } 597 // Queue notifications for per-collection watches. 598 for _, info := range w.watches[watchKey{c.Name, nil}] { 599 if info.filter != nil && !info.filter(d[i]) { 600 continue 601 } 602 evt := event{ 603 ch: info.ch, 604 key: key, 605 isDeleted: revno == -1, 606 revno: revno, 607 } 608 w.syncEvents = append(w.syncEvents, evt) 609 } 610 // Queue notifications for per-document watches. 611 infos := w.watches[key] 612 for i, info := range infos { 613 if revno > info.revno || revno < 0 && info.revno >= 0 { 614 infos[i].revno = revno 615 evt := event{ 616 ch: info.ch, 617 key: key, 618 isDeleted: revno == -1, 619 revno: revno, 620 } 621 w.syncEvents = append(w.syncEvents, evt) 622 } 623 } 624 } 625 } 626 } 627 if err := iter.Close(); err != nil { 628 if qerr, ok := err.(*mgo.QueryError); ok { 629 // CappedPositionLost is code 136. 630 // Just in case that changes for some reason, we'll also check the error message. 631 if qerr.Code == 136 || strings.Contains(qerr.Message, "CappedPositionLost") { 632 logger.Warningf("watcher iterator failed due to txn log collection overflow") 633 err = cappedPositionLostError 634 } 635 } 636 return errors.Annotate(err, "watcher iteration error") 637 } 638 return nil 639 }