github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/state/watcher/hubwatcher.go (about) 1 // Copyright 2017 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package watcher 5 6 import ( 7 "fmt" 8 "reflect" 9 "time" 10 11 "github.com/juju/errors" 12 "gopkg.in/juju/worker.v1" 13 "gopkg.in/tomb.v2" 14 ) 15 16 var ( 17 // HubWatcherIdleFunc allows tets to be able to get callbacks 18 // when the hub watcher hasn't notified any watchers for a specified time. 19 HubWatcherIdleFunc func(string) 20 21 // HubWatcherIdleTime relates to how long the hub needs to wait 22 // having notified no watchers to be considered idle. 23 HubWatcherIdleTime = 50 * time.Millisecond 24 ) 25 26 // HubSource represents the listening aspects of the pubsub hub. 27 type HubSource interface { 28 SubscribeMatch(matcher func(string) bool, handler func(string, interface{})) func() 29 } 30 31 // filterFactor represents the portion of a first-order filter that is calculating a running average. 32 // This is the weight of the new inputs. A value of 0.5 would represent averaging over just 2 samples, 33 // 0.1 would be 10 samples, 0.01 corresponds to averaging over approximately 100 items. 34 const filterFactor = 0.01 35 36 // HubWatcher listens to events from the hub and passes them on to the registered 37 // watchers. 38 type HubWatcher struct { 39 hub HubSource 40 clock Clock 41 modelUUID string 42 idleFunc func(string) 43 logger Logger 44 45 tomb tomb.Tomb 46 47 // watches holds the observers managed by Watch/Unwatch. 48 watches map[watchKey][]watchInfo 49 50 // syncEvents contain the events to be 51 // dispatched to the watcher channels. They're queued during 52 // processing and flushed at the end to simplify the algorithm. 53 syncEvents []event 54 55 // request is used to deliver requests from the public API into 56 // the the goroutine loop. 57 request chan interface{} 58 59 // changes are events published to the hub. 60 changes chan Change 61 62 // lastSyncLen was the length of syncEvents in the last flush() 63 lastSyncLen int 64 65 // maxSyncLen is the longest we've seen syncEvents 66 maxSyncLen int 67 68 // averageSyncLen applies a first-order filter for every time we flush(), to give us an idea of how large, 69 // on average, our sync queue is 70 averageSyncLen float64 71 72 // syncEventDocCount is all sync events that we've ever processed for individual docs 73 syncEventDocCount uint64 74 75 // syncEventCollectionCount is all the sync events we've processed for collection watches 76 syncEventCollectionCount uint64 77 78 // requestCount is all requests that we've ever processed 79 requestCount uint64 80 81 // changeCount is the number of change events we've processed 82 changeCount uint64 83 84 // revnoMapBytes tracks how big our revnomap is in approximate bytes 85 revnoMapBytes uintptr 86 } 87 88 // HubWatcherConfig contains the configuration parameters required 89 // for a NewHubWatcher. 90 type HubWatcherConfig struct { 91 // Hub is the source of the events for the hub watcher. 92 Hub HubSource 93 // Clock allows tests to control the advancing of time. 94 Clock Clock 95 // ModelUUID refers to the model that this hub watcher is being 96 // started for. 97 ModelUUID string 98 // Logger is used to control where the log messages for this watcher go. 99 Logger Logger 100 } 101 102 // Validate ensures that all the values that have to be set are set. 103 func (config HubWatcherConfig) Validate() error { 104 if config.Hub == nil { 105 return errors.NotValidf("missing Hub") 106 } 107 if config.Clock == nil { 108 return errors.NotValidf("missing Clock") 109 } 110 if config.ModelUUID == "" { 111 return errors.NotValidf("missing Model UUID") 112 } 113 return nil 114 } 115 116 // NewHubWatcher returns a new watcher observing Change events published to the 117 // hub. 118 func NewHubWatcher(config HubWatcherConfig) (*HubWatcher, error) { 119 if err := config.Validate(); err != nil { 120 return nil, errors.Annotate(err, "new HubWatcher invalid config") 121 } 122 watcher, _ := newHubWatcher(config.Hub, config.Clock, config.ModelUUID, config.Logger) 123 return watcher, nil 124 } 125 126 func newHubWatcher(hub HubSource, clock Clock, modelUUID string, logger Logger) (*HubWatcher, <-chan struct{}) { 127 if logger == nil { 128 logger = noOpLogger{} 129 } 130 started := make(chan struct{}) 131 w := &HubWatcher{ 132 hub: hub, 133 clock: clock, 134 modelUUID: modelUUID, 135 idleFunc: HubWatcherIdleFunc, 136 logger: logger, 137 watches: make(map[watchKey][]watchInfo), 138 request: make(chan interface{}), 139 changes: make(chan Change), 140 } 141 w.tomb.Go(func() error { 142 unsub := hub.SubscribeMatch( 143 func(string) bool { return true }, w.receiveEvent, 144 ) 145 defer unsub() 146 close(started) 147 err := w.loop() 148 cause := errors.Cause(err) 149 // tomb expects ErrDying or ErrStillAlive as 150 // exact values, so we need to log and unwrap 151 // the error first. 152 if err != nil && cause != tomb.ErrDying { 153 logger.Infof("watcher loop failed: %v", err) 154 } 155 return cause 156 }) 157 return w, started 158 } 159 160 func (w *HubWatcher) receiveEvent(topic string, data interface{}) { 161 switch topic { 162 case txnWatcherStarting: 163 // We don't do anything on a start. 164 case txnWatcherSyncErr: 165 w.tomb.Kill(errors.New("txn watcher sync error")) 166 case txnWatcherCollection: 167 change, ok := data.(Change) 168 if !ok { 169 w.logger.Warningf("incoming event not a Change") 170 return 171 } 172 select { 173 case w.changes <- change: 174 case <-w.tomb.Dying(): 175 } 176 default: 177 w.logger.Warningf("programming error, unknown topic: %q", topic) 178 } 179 } 180 181 // Kill is part of the worker.Worker interface. 182 func (w *HubWatcher) Kill() { 183 w.tomb.Kill(nil) 184 } 185 186 // Wait is part of the worker.Worker interface. 187 func (w *HubWatcher) Wait() error { 188 return w.tomb.Wait() 189 } 190 191 // Stop stops all the watcher activities. 192 func (w *HubWatcher) Stop() error { 193 return worker.Stop(w) 194 } 195 196 // Dead returns a channel that is closed when the watcher has stopped. 197 func (w *HubWatcher) Dead() <-chan struct{} { 198 return w.tomb.Dead() 199 } 200 201 // Err returns the error with which the watcher stopped. 202 // It returns nil if the watcher stopped cleanly, tomb.ErrStillAlive 203 // if the watcher is still running properly, or the respective error 204 // if the watcher is terminating or has terminated with an error. 205 func (w *HubWatcher) Err() error { 206 return w.tomb.Err() 207 } 208 209 func (w *HubWatcher) sendReq(req interface{}) { 210 select { 211 case w.request <- req: 212 case <-w.tomb.Dying(): 213 } 214 } 215 216 func (w *HubWatcher) sendAndWaitReq(req waitableRequest) error { 217 select { 218 case w.request <- req: 219 case <-w.tomb.Dying(): 220 return errors.Trace(tomb.ErrDying) 221 } 222 completed := req.Completed() 223 select { 224 case err := <-completed: 225 return errors.Trace(err) 226 case <-w.tomb.Dying(): 227 return errors.Trace(tomb.ErrDying) 228 } 229 } 230 231 // WatchMulti watches a particular collection for several ids. 232 // The request is synchronous with the worker loop, so by the time the function returns, 233 // we guarantee that the watch is in place. If there is a mistake in the arguments (id is nil, 234 // channel is already watching a given id), an error will be returned and no watches will be added. 235 func (w *HubWatcher) WatchMulti(collection string, ids []interface{}, ch chan<- Change) error { 236 for _, id := range ids { 237 if id == nil { 238 return errors.Errorf("cannot watch a document with nil id") 239 } 240 } 241 req := reqWatchMulti{ 242 collection: collection, 243 ids: ids, 244 completedCh: make(chan error), 245 watchCh: ch, 246 } 247 return errors.Trace(w.sendAndWaitReq(req)) 248 } 249 250 // Watch starts watching the given collection and document id. 251 // An event will be sent onto ch whenever a matching document's txn-revno 252 // field is observed to change after a transaction is applied. 253 func (w *HubWatcher) Watch(collection string, id interface{}, ch chan<- Change) { 254 if id == nil { 255 panic("watcher: cannot watch a document with nil id") 256 } 257 // We use a value of -2 to indicate that we don't know the state of the document. 258 // -1 would indicate that we think the document is deleted (and won't trigger 259 // a change event if the document really is deleted). 260 w.sendAndWaitReq(reqWatch{ 261 key: watchKey{collection, id}, 262 info: watchInfo{ch, -2, nil}, 263 registeredCh: make(chan error), 264 }) 265 } 266 267 // WatchCollection starts watching the given collection. 268 // An event will be sent onto ch whenever the txn-revno field is observed 269 // to change after a transaction is applied for any document in the collection. 270 func (w *HubWatcher) WatchCollection(collection string, ch chan<- Change) { 271 w.WatchCollectionWithFilter(collection, ch, nil) 272 } 273 274 // WatchCollectionWithFilter starts watching the given collection. 275 // An event will be sent onto ch whenever the txn-revno field is observed 276 // to change after a transaction is applied for any document in the collection, so long as the 277 // specified filter function returns true when called with the document id value. 278 func (w *HubWatcher) WatchCollectionWithFilter(collection string, ch chan<- Change, filter func(interface{}) bool) { 279 w.sendAndWaitReq(reqWatch{ 280 key: watchKey{collection, nil}, 281 info: watchInfo{ch, 0, filter}, 282 registeredCh: make(chan error), 283 }) 284 } 285 286 // Unwatch stops watching the given collection and document id via ch. 287 func (w *HubWatcher) Unwatch(collection string, id interface{}, ch chan<- Change) { 288 if id == nil { 289 panic("watcher: cannot unwatch a document with nil id") 290 } 291 w.sendReq(reqUnwatch{watchKey{collection, id}, ch}) 292 } 293 294 // UnwatchCollection stops watching the given collection via ch. 295 func (w *HubWatcher) UnwatchCollection(collection string, ch chan<- Change) { 296 w.sendReq(reqUnwatch{watchKey{collection, nil}, ch}) 297 } 298 299 // HubWatcherStats defines a few metrics that the hub watcher tracks 300 type HubWatcherStats struct { 301 // WatchKeyCount is the number of keys being watched 302 WatchKeyCount int 303 // WatchCount is the number of watchers (keys can be watched by multiples) 304 WatchCount uint64 305 // SyncQueueCap is the maximum buffer size for synchronization events 306 SyncQueueCap int 307 // SyncQueueLen is the current number of events being queued 308 SyncQueueLen int 309 // SyncLastLen was the length of SyncQueue the last time we flushed 310 SyncLastLen int 311 // SyncAvgLen is a smoothed average of recent sync lengths 312 SyncAvgLen int 313 // SyncMaxLen was the longest we've seen SyncQueue when flushing 314 SyncMaxLen int 315 // SyncEventDocCount is the number of sync events we've generated for specific documents 316 SyncEventDocCount uint64 317 // SyncEventCollCount is the number of sync events we've generated for documents changed in collections 318 SyncEventCollCount uint64 319 // RequestCount is the number of requests (reqWatch/reqUnwatch, etc) that we've seen 320 RequestCount uint64 321 // ChangeCount is the number of changes we've processed 322 ChangeCount uint64 323 } 324 325 type reqStats struct { 326 ch chan<- HubWatcherStats 327 } 328 329 func (w *HubWatcher) Stats() HubWatcherStats { 330 ch := make(chan HubWatcherStats) 331 w.sendReq(reqStats{ch: ch}) 332 select { 333 case <-w.tomb.Dying(): 334 return HubWatcherStats{} 335 case stats := <-ch: 336 return stats 337 } 338 } 339 340 // Report conforms to the worker.Runner.Report interface for returning information about the active worker. 341 func (w *HubWatcher) Report() map[string]interface{} { 342 stats := w.Stats() 343 return map[string]interface{}{ 344 "watch-count": stats.WatchCount, 345 "watch-key-count": stats.WatchKeyCount, 346 "sync-queue-cap": stats.SyncQueueCap, 347 "sync-queue-len": stats.SyncQueueLen, 348 "sync-last-len": stats.SyncLastLen, 349 "sync-avg-len": stats.SyncAvgLen, 350 "sync-max-len": stats.SyncMaxLen, 351 "sync-event-doc-count": stats.SyncEventDocCount, 352 "sync-event-coll-count": stats.SyncEventCollCount, 353 "request-count": stats.RequestCount, 354 "change-count": stats.ChangeCount, 355 } 356 } 357 358 // loop implements the main watcher loop. 359 // period is the delay between each sync. 360 func (w *HubWatcher) loop() error { 361 w.logger.Tracef("%p loop started", w) 362 defer w.logger.Tracef("loop finished") 363 // idle is initially nil, and is only ever set if idleFunc 364 // has a value. 365 var idle <-chan time.Time 366 if w.idleFunc != nil { 367 w.logger.Tracef("%p set idle timeout to %s", w, HubWatcherIdleTime) 368 idle = w.clock.After(HubWatcherIdleTime) 369 } 370 for { 371 select { 372 case <-w.tomb.Dying(): 373 return errors.Trace(tomb.ErrDying) 374 case inChange := <-w.changes: 375 w.queueChange(inChange) 376 case req := <-w.request: 377 w.handle(req) 378 case <-idle: 379 w.logger.Tracef("%p notify %s idle", w, w.modelUUID) 380 w.idleFunc(w.modelUUID) 381 idle = w.clock.After(HubWatcherIdleTime) 382 } 383 for len(w.syncEvents) > 0 { 384 select { 385 case <-w.tomb.Dying(): 386 return errors.Trace(tomb.ErrDying) 387 default: 388 if w.flush() { 389 if w.idleFunc != nil { 390 w.logger.Tracef("%p set idle timeout to %s", w, HubWatcherIdleTime) 391 idle = w.clock.After(HubWatcherIdleTime) 392 } 393 } 394 } 395 } 396 } 397 } 398 399 func (w *HubWatcher) flush() bool { 400 watchersNotified := false 401 // syncEvents are stored first in first out. 402 // syncEvents may grow during the looping here if new 403 // watch events come in while we are notifying other watchers. 404 w.logger.Tracef("%p flushing syncEvents: len(%d) cap(%d)", w, len(w.syncEvents), cap(w.syncEvents)) 405 for i := 0; i < len(w.syncEvents); i++ { 406 // We need to reget the address value each time through the loop 407 // as the slice may be reallocated. 408 for e := &w.syncEvents[i]; e.ch != nil; e = &w.syncEvents[i] { 409 outChange := Change{ 410 C: e.key.c, 411 Id: e.key.id, 412 Revno: e.revno, 413 } 414 w.logger.Tracef("%p sending syncEvent(%d): e.ch=%v %v", w, i, e.ch, outChange) 415 select { 416 case <-w.tomb.Dying(): 417 return watchersNotified 418 case req := <-w.request: 419 w.handle(req) 420 continue 421 case inChange := <-w.changes: 422 w.queueChange(inChange) 423 continue 424 case e.ch <- outChange: 425 w.logger.Tracef("%p e.ch=%v has been notified %v", w, e.ch, outChange) 426 watchersNotified = true 427 } 428 break 429 } 430 } 431 w.lastSyncLen = len(w.syncEvents) 432 if w.lastSyncLen > w.maxSyncLen { 433 w.maxSyncLen = w.lastSyncLen 434 } 435 // first-order filter: https://en.wikipedia.org/wiki/Low-pass_filter#Discrete-time_realization 436 // This allows us to compute an "average" without having to actually track N samples. 437 w.averageSyncLen = (filterFactor * float64(w.lastSyncLen)) + ((1.0 - filterFactor) * w.averageSyncLen) 438 w.syncEvents = w.syncEvents[:0] 439 // TODO(jam): 2018-11-07 This would probably be a good time to wipe syncEvents if cap(syncEvents) is significantly 440 // larger than averageSyncLen. Consider something like "if cap(syncEventsLen) > 10*w.averageSyncLen". 441 // That means that we can shrink the buffer after an outlier, rather than requiring it to always be the longest 442 // it was ever needed. 443 w.logger.Tracef("%p syncEvents after flush: len(%d), cap(%d) avg(%.1f)", w, len(w.syncEvents), cap(w.syncEvents), w.averageSyncLen) 444 if cap(w.syncEvents) > 100 && float64(cap(w.syncEvents)) > 10.0*w.averageSyncLen { 445 w.logger.Debugf("syncEvents buffer being reset from peak size", cap(w.syncEvents)) 446 w.syncEvents = nil 447 } 448 449 return watchersNotified 450 } 451 452 // handle deals with requests delivered by the public API 453 // onto the background watcher goroutine. 454 func (w *HubWatcher) handle(req interface{}) { 455 w.logger.Tracef("%p got request: %#v", w, req) 456 w.requestCount++ 457 switch r := req.(type) { 458 case reqWatch: 459 // TODO(jam): 2019-01-30 Now that reqWatch has a registeredCh, we could have Watch return an error 460 // rather than panic() 461 for _, info := range w.watches[r.key] { 462 if info.ch == r.info.ch { 463 panic(fmt.Errorf("tried to re-add channel %v for %s", info.ch, r.key)) 464 } 465 } 466 w.watches[r.key] = append(w.watches[r.key], r.info) 467 if r.registeredCh != nil { 468 select { 469 case r.registeredCh <- nil: 470 case <-w.tomb.Dying(): 471 } 472 } 473 case reqWatchMulti: 474 for _, id := range r.ids { 475 key := watchKey{c: r.collection, id: id} 476 for _, info := range w.watches[key] { 477 if info.ch == r.watchCh { 478 err := errors.Errorf("tried to re-add channel %v for %s", r.watchCh, key) 479 select { 480 case r.completedCh <- err: 481 case <-w.tomb.Dying(): 482 } 483 return 484 } 485 } 486 } 487 for _, id := range r.ids { 488 key := watchKey{c: r.collection, id: id} 489 info := watchInfo{ 490 ch: r.watchCh, 491 revno: -2, 492 filter: nil, 493 } 494 w.watches[key] = append(w.watches[key], info) 495 } 496 select { 497 case r.completedCh <- nil: 498 case <-w.tomb.Dying(): 499 } 500 case reqUnwatch: 501 watches := w.watches[r.key] 502 removed := false 503 for i, info := range watches { 504 if info.ch == r.ch { 505 watches[i] = watches[len(watches)-1] 506 w.watches[r.key] = watches[:len(watches)-1] 507 removed = true 508 break 509 } 510 } 511 if !removed { 512 panic(fmt.Errorf("tried to remove missing channel %v for %s", r.ch, r.key)) 513 } 514 for i := range w.syncEvents { 515 e := &w.syncEvents[i] 516 if r.key.match(e.key) && e.ch == r.ch { 517 e.ch = nil 518 } 519 } 520 case reqStats: 521 var watchCount uint64 522 for _, watches := range w.watches { 523 watchCount += uint64(len(watches)) 524 } 525 stats := HubWatcherStats{ 526 ChangeCount: w.changeCount, 527 WatchKeyCount: len(w.watches), 528 WatchCount: watchCount, 529 SyncQueueCap: cap(w.syncEvents), 530 SyncQueueLen: len(w.syncEvents), 531 SyncLastLen: w.lastSyncLen, 532 SyncMaxLen: w.maxSyncLen, 533 SyncAvgLen: int(w.averageSyncLen + 0.5), 534 SyncEventCollCount: w.syncEventCollectionCount, 535 SyncEventDocCount: w.syncEventDocCount, 536 RequestCount: w.requestCount, 537 } 538 select { 539 case <-w.tomb.Dying(): 540 return 541 case r.ch <- stats: 542 } 543 default: 544 panic(fmt.Errorf("unknown request: %T", req)) 545 } 546 } 547 548 var ( 549 int64Size = reflect.TypeOf(int64(0)).Size() 550 strSize = reflect.TypeOf("").Size() 551 watchKeySize = reflect.TypeOf(watchKey{}).Size() 552 ) 553 554 func sizeInMap(key watchKey) uintptr { 555 // This includes the size of the int64 pointer that we target 556 // TODO: (jam) 2018-11-05 there would be ways to be more accurate about the sizes, 557 // but this is a rough approximation. It should handle the most common types that we know about, 558 // where the id is a string 559 size := int64Size 560 size += watchKeySize 561 size += strSize 562 size += uintptr(len(key.c)) 563 switch id := key.id.(type) { 564 case string: 565 size += strSize 566 size += uintptr(len(id)) 567 default: 568 size += reflect.TypeOf(id).Size() 569 } 570 return size 571 } 572 573 // queueChange queues up the change for the registered watchers. 574 func (w *HubWatcher) queueChange(change Change) { 575 w.changeCount++ 576 w.logger.Tracef("%p got change document: %#v", w, change) 577 key := watchKey{change.C, change.Id} 578 revno := change.Revno 579 580 // Queue notifications for per-collection watches. 581 for _, info := range w.watches[watchKey{change.C, nil}] { 582 if info.filter != nil && !info.filter(change.Id) { 583 continue 584 } 585 evt := event{ 586 ch: info.ch, 587 key: key, 588 revno: revno, 589 } 590 w.syncEvents = append(w.syncEvents, evt) 591 w.syncEventCollectionCount++ 592 w.logger.Tracef("%p adding event for collection %q watch %v, syncEvents: len(%d), cap(%d)", w, change.C, info.ch, len(w.syncEvents), cap(w.syncEvents)) 593 } 594 595 // Queue notifications for per-document watches. 596 infos := w.watches[key] 597 for i, info := range infos { 598 if revno > info.revno || revno < 0 && info.revno >= 0 { 599 infos[i].revno = revno 600 evt := event{ 601 ch: info.ch, 602 key: key, 603 revno: revno, 604 } 605 w.syncEvents = append(w.syncEvents, evt) 606 w.syncEventDocCount++ 607 w.logger.Tracef("%p adding event for %v watch %v, syncEvents: len(%d), cap(%d)", w, key, info.ch, len(w.syncEvents), cap(w.syncEvents)) 608 } 609 } 610 }