github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/state/presence/presence.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 // The presence package implements an interface for observing liveness 5 // of arbitrary keys (agents, processes, etc) on top of MongoDB. 6 // The design works by periodically updating the database so that 7 // watchers can tell an arbitrary key is alive. 8 package presence 9 10 import ( 11 "fmt" 12 "math/rand" 13 "strconv" 14 "sync" 15 "time" 16 17 "github.com/juju/errors" 18 "github.com/juju/loggo" 19 "gopkg.in/juju/names.v2" 20 "gopkg.in/juju/worker.v1" 21 "gopkg.in/mgo.v2" 22 "gopkg.in/mgo.v2/bson" 23 "gopkg.in/tomb.v2" 24 ) 25 26 var logger = loggo.GetLogger("juju.state.presence") 27 28 // lookupBatchSize is how many Sequence => Being keys we'll lookup at one time. 29 // In testing, we could do 50,000 entries in a single request without errors. 30 // This mostly prevents us from blowout. 31 // Going from 10 to 100, increased the throughput 2x. Going to 1k was ~1.1x, and 32 // going to 10k was another 1.1x. 1000 seems a reasonable size. 33 const lookupBatchSize = 1000 34 35 // Agent shouldn't really live here -- it's not used in this package, 36 // and is implemented by a couple of state types for the convenience of 37 // the apiserver -- but one of the methods returns a concrete *Pinger, 38 // and that ties it down here quite effectively (until we want to take 39 // on the task of cleaning it up and promoting it to core, which might 40 // well never happen). 41 type Agent interface { 42 AgentPresence() (bool, error) 43 SetAgentPresence() (*Pinger, error) 44 WaitAgentPresence(time.Duration) error 45 } 46 47 // docIDInt64 generates a globally unique id value 48 // where the model uuid is prefixed to the 49 // given int64 localID. 50 func docIDInt64(modelUUID string, localID int64) string { 51 return modelUUID + ":" + strconv.FormatInt(localID, 10) 52 } 53 54 // docIDStr generates a globally unique id value 55 // where the model uuid is prefixed to the 56 // given string localID. 57 func docIDStr(modelUUID string, localID string) string { 58 return modelUUID + ":" + localID 59 } 60 61 // The implementation works by assigning a unique sequence number to each 62 // pinger that is alive, and the pinger is then responsible for 63 // periodically updating the current time slot document with its 64 // sequence number so that watchers can tell it is alive. 65 // 66 // There is only one time slot document per time slot, per model. The 67 // internal implementation of the time slot document is as follows: 68 // 69 // { 70 // "_id": <model UUID>:<time slot>, 71 // "slot": <slot>, 72 // "alive": { hex(<pinger seq> / 63) : (1 << (<pinger seq> % 63) | <others>) }, 73 // "dead": { hex(<pinger seq> / 63) : (1 << (<pinger seq> % 63) | <others>) }, 74 // } 75 // 76 // All pingers that have their sequence number under "alive" and not 77 // under "dead" are currently alive. This design enables implementing 78 // a ping with a single update operation, a kill with another operation, 79 // and obtaining liveness data with a single query that returns two 80 // documents (the last two time slots). 81 // 82 // A new pinger sequence is obtained every time a pinger starts by atomically 83 // incrementing a counter in a document in a helper collection. There is only 84 // one such document per model. That sequence number is then inserted 85 // into the beings collection to establish the mapping between pinger sequence 86 // and key. 87 88 // psuedoRandomFactor defines an increasing chance that we will trigger an effect. 89 // Inspired by: http://dota2.gamepedia.com/Random_distribution 90 // The idea is that 'on average' we will trigger 5% of the time. However, that 91 // leaves a low but non-zero chance that we will *never* trigger, and a 92 // surprisingly high chance that we will trigger twice in a row. 93 // psuedoRandom increases the chance to trigger everytime it does not trigger, 94 // ultimately making it mandatory that you will trigger, and giving the desirable 95 // average case that you will trigger while still giving some slop so that 96 // machines won't get into sync and trigger at the same time. 97 // psuedoRandomFactor of 0.00380 represents a 5% average chance to trigger. 98 const psuedoRandomFactor = 0.00380 99 100 // A Watcher can watch any number of pinger keys for liveness changes. 101 type Watcher struct { 102 modelUUID string 103 tomb tomb.Tomb 104 base *mgo.Collection 105 pings *mgo.Collection 106 beings *mgo.Collection 107 108 // delta is an approximate clock skew between the local system 109 // clock and the database clock. 110 delta time.Duration 111 112 // beingKey and beingSeq are the pinger seq <=> key mappings. 113 // Entries in these maps are considered alive. 114 beingKey map[int64]string 115 beingSeq map[string]int64 116 117 // ignoredSeqs are sequences that are active pingers, but are superseded by another pinger. 118 // We know who they represent, but there is a newer being that applies. This is because some agents 119 // (like the Controller agents), maintain multiple active connections. 120 ignoredSeqs map[int64]string 121 122 // watches has the per-key observer channels from Watch/Unwatch. 123 watches map[string][]chan<- Change 124 125 // pending contains all the events to be dispatched to the watcher 126 // channels. They're queued during processing and flushed at the 127 // end to simplify the algorithm. 128 pending []event 129 130 // request is used to deliver requests from the public API into 131 // the the goroutine loop. 132 request chan interface{} 133 134 // syncDone contains pending done channels from sync requests. 135 syncDone []chan bool 136 137 // next will dispatch when it's time to sync the database 138 // knowledge. It's maintained here so that ForceRefresh 139 // can manipulate it to force a sync sooner. 140 next <-chan time.Time 141 142 // syncsSinceLastPrune is a counter that tracks how long it has been 143 // since we've run a prune on the Beings and Pings collections. 144 syncsSinceLastPrune int 145 146 // beingLoads tracks the number of beings that we've had to refresh from the database 147 beingLoads uint64 148 } 149 150 func (w *Watcher) String() string { 151 return fmt.Sprintf("presence.Watcher(%s)", w.modelUUID) 152 } 153 154 type event struct { 155 ch chan<- Change 156 key string 157 alive bool 158 } 159 160 // Change holds a liveness change notification. 161 type Change struct { 162 Key string 163 Alive bool 164 } 165 166 // NewDeadWatcher returns a new watcher that is already dead 167 // and always returns the given error from its Err method. 168 func NewDeadWatcher(err error) *Watcher { 169 var w Watcher 170 w.tomb.Kill(errors.Trace(err)) 171 return &w 172 } 173 174 // NewWatcher returns a new Watcher. 175 func NewWatcher(base *mgo.Collection, modelTag names.ModelTag) *Watcher { 176 w := &Watcher{ 177 modelUUID: modelTag.Id(), 178 base: base, 179 pings: pingsC(base), 180 beings: beingsC(base), 181 beingKey: make(map[int64]string), 182 beingSeq: make(map[string]int64), 183 watches: make(map[string][]chan<- Change), 184 request: make(chan interface{}), 185 ignoredSeqs: make(map[int64]string), 186 } 187 w.tomb.Go(func() error { 188 err := w.loop() 189 cause := errors.Cause(err) 190 // tomb expects ErrDying or ErrStillAlive as 191 // exact values, so we need to log and unwrap 192 // the error first. 193 if err != nil && cause != tomb.ErrDying { 194 logger.Infof("watcher loop failed: %v", err) 195 } 196 return cause 197 }) 198 return w 199 } 200 201 // Kill is part of the worker.Worker interface. 202 func (w *Watcher) Kill() { 203 w.tomb.Kill(nil) 204 } 205 206 // Wait is part of the worker.Worker interface. 207 func (w *Watcher) Wait() error { 208 return w.tomb.Wait() 209 } 210 211 // Stop stops all the watcher activities. 212 func (w *Watcher) Stop() error { 213 return worker.Stop(w) 214 } 215 216 // Dead returns a channel that is closed when the watcher has stopped. 217 func (w *Watcher) Dead() <-chan struct{} { 218 return w.tomb.Dead() 219 } 220 221 // Err returns the error with which the watcher stopped. 222 // It returns nil if the watcher stopped cleanly, tomb.ErrStillAlive 223 // if the watcher is still running properly, or the respective error 224 // if the watcher is terminating or has terminated with an error. 225 func (w *Watcher) Err() error { 226 return w.tomb.Err() 227 } 228 229 type reqWatch struct { 230 key string 231 ch chan<- Change 232 } 233 234 type reqUnwatch struct { 235 key string 236 ch chan<- Change 237 } 238 239 type reqSync struct { 240 done chan bool 241 } 242 243 type reqAlive struct { 244 key string 245 result chan bool 246 } 247 248 func (w *Watcher) sendReq(req interface{}) { 249 select { 250 case w.request <- req: 251 case <-w.tomb.Dying(): 252 } 253 } 254 255 // Watch starts watching the liveness of key. An event will 256 // be sent onto ch to report the initial status for the key, and 257 // from then on a new event will be sent whenever a change is 258 // detected. Change values sent to the channel must be consumed, 259 // or the whole watcher will blocked. 260 func (w *Watcher) Watch(key string, ch chan<- Change) { 261 w.sendReq(reqWatch{key, ch}) 262 } 263 264 // Unwatch stops watching the liveness of key via ch. 265 func (w *Watcher) Unwatch(key string, ch chan<- Change) { 266 w.sendReq(reqUnwatch{key, ch}) 267 } 268 269 // StartSync forces the watcher to load new events from the database. 270 func (w *Watcher) StartSync() { 271 w.sendReq(reqSync{nil}) 272 } 273 274 // Sync forces the watcher to load new events from the database and blocks 275 // until all events have been dispatched. 276 func (w *Watcher) Sync() { 277 done := make(chan bool) 278 w.sendReq(reqSync{done}) 279 select { 280 case <-done: 281 case <-w.tomb.Dying(): 282 } 283 } 284 285 // Alive returns whether the key is currently considered alive by w, 286 // or an error in case the watcher is dying. 287 func (w *Watcher) Alive(key string) (bool, error) { 288 result := make(chan bool, 1) 289 w.sendReq(reqAlive{key, result}) 290 var alive bool 291 select { 292 case alive = <-result: 293 case <-w.tomb.Dying(): 294 return false, errors.Errorf("cannot check liveness: watcher is dying") 295 } 296 logger.Tracef("[%s] Alive(%q) -> %v", w.modelUUID[:6], key, alive) 297 return alive, nil 298 } 299 300 // period is the length of each time slot in seconds. 301 // It's not a time.Duration because the code is more convenient like 302 // this and also because sub-second timings don't work as the slot 303 // identifier is an int64 in seconds. 304 var period int64 = 30 305 306 // loop implements the main watcher loop. 307 func (w *Watcher) loop() error { 308 var err error 309 if w.delta, err = clockDelta(w.base); err != nil { 310 return errors.Trace(err) 311 } 312 // Always sync before handling request. 313 if err := w.sync(); err != nil { 314 return errors.Trace(err) 315 } 316 w.next = time.After(time.Duration(period) * time.Second) 317 for { 318 select { 319 case <-w.tomb.Dying(): 320 return errors.Trace(tomb.ErrDying) 321 case <-w.next: 322 w.next = time.After(time.Duration(period) * time.Second) 323 syncDone := w.syncDone 324 w.syncDone = nil 325 if err := w.sync(); err != nil { 326 return errors.Trace(err) 327 } 328 w.flush() 329 for _, done := range syncDone { 330 close(done) 331 } 332 w.syncsSinceLastPrune++ 333 w.checkShouldPrune() 334 case req := <-w.request: 335 w.handle(req) 336 w.flush() 337 } 338 } 339 } 340 341 // flush sends all pending events to their respective channels. 342 func (w *Watcher) flush() { 343 // w.pending may get new requests as we handle other requests. 344 for i := 0; i < len(w.pending); i++ { 345 e := &w.pending[i] 346 // Allow the handling of requests while waiting for e.ch 347 // to be ready to read from the channel. 348 for e.ch != nil { 349 select { 350 case <-w.tomb.Dying(): 351 return 352 case req := <-w.request: 353 w.handle(req) 354 // handle may append to the w.pending array, and/or it may unwatch something that was previously pending 355 // thus changing e.ch to nil while we are waiting to send the request. We need to make sure we are using 356 // the correct 'e' object 357 // See TestRobustness which fails if this line doesn't exist 358 e = &w.pending[i] 359 continue 360 case e.ch <- Change{e.key, e.alive}: 361 } 362 break 363 } 364 } 365 w.pending = w.pending[:0] 366 } 367 368 // checkShouldPrune looks at whether we should run a prune step this time 369 func (w *Watcher) checkShouldPrune() { 370 chanceToPrune := float64(w.syncsSinceLastPrune) * psuedoRandomFactor 371 if chanceToPrune < 1.0 && rand.Float64() > chanceToPrune { 372 return 373 } 374 // When we decide to prune, we also drop our old cached beings 375 logger.Debugf("watcher %q decided to prune %q and %q", w.modelUUID, w.beings.Name, w.pings.Name) 376 w.syncsSinceLastPrune = 0 377 pruner := NewPruner(w.modelUUID, w.beings, w.pings, w.delta) 378 err := pruner.Prune(w.ignoredSeqs) 379 if err != nil { 380 logger.Warningf("error while pruning %q for %q: %v", w.beings.Name, w.modelUUID, err) 381 } 382 } 383 384 // handle deals with requests delivered by the public API 385 // onto the background watcher goroutine. 386 func (w *Watcher) handle(req interface{}) { 387 logger.Tracef("[%s] got request: %#v for model", w.modelUUID[:6], req) 388 switch r := req.(type) { 389 case reqSync: 390 w.next = time.After(0) 391 if r.done != nil { 392 w.syncDone = append(w.syncDone, r.done) 393 } 394 case reqWatch: 395 for _, ch := range w.watches[r.key] { 396 if ch == r.ch { 397 panic("adding channel twice for same key") 398 } 399 } 400 w.watches[r.key] = append(w.watches[r.key], r.ch) 401 _, alive := w.beingSeq[r.key] 402 w.pending = append(w.pending, event{r.ch, r.key, alive}) 403 case reqUnwatch: 404 watches := w.watches[r.key] 405 for i, ch := range watches { 406 if ch == r.ch { 407 watches[i] = watches[len(watches)-1] 408 w.watches[r.key] = watches[:len(watches)-1] 409 break 410 } 411 } 412 for i := range w.pending { 413 e := &w.pending[i] 414 if e.key == r.key && e.ch == r.ch { 415 e.ch = nil 416 } 417 } 418 case reqAlive: 419 _, alive := w.beingSeq[r.key] 420 r.result <- alive 421 default: 422 panic(fmt.Errorf("unknown request: %T", req)) 423 } 424 } 425 426 type beingInfo struct { 427 DocID string `bson:"_id"` 428 Seq int64 `bson:"seq,omitempty"` 429 Key string `bson:"key,omitempty"` 430 } 431 432 type pingInfo struct { 433 DocID string `bson:"_id"` 434 Slot int64 `bson:"slot,omitempty"` 435 Alive map[string]int64 `bson:",omitempty"` 436 Dead map[string]int64 `bson:",omitempty"` 437 } 438 439 func (w *Watcher) lookupPings(session *mgo.Session) ([]pingInfo, error) { 440 pings := w.pings.With(session) 441 return lookupPings(pings, w.modelUUID, time.Now(), w.delta) 442 } 443 444 func lookupPings(pings *mgo.Collection, modelUUID string, ts time.Time, delta time.Duration) ([]pingInfo, error) { 445 // TODO(perrito666) 2016-05-02 lp:1558657 446 s := timeSlot(ts, delta) 447 slot := docIDInt64(modelUUID, s) 448 previousSlot := docIDInt64(modelUUID, s-period) 449 var ping []pingInfo 450 q := bson.D{{"$or", []pingInfo{{DocID: slot}, {DocID: previousSlot}}}} 451 err := pings.Find(q).All(&ping) 452 if err != nil && err != mgo.ErrNotFound { 453 return nil, errors.Trace(err) 454 } 455 return ping, nil 456 } 457 458 func (w *Watcher) lookForDead(pings []pingInfo) (map[int64]bool, error) { 459 // Learn about all enforced deaths. 460 // TODO(ericsnow) Remove this once KillForTesting() goes away. 461 dead := make(map[int64]bool) 462 deadSeqs, err := deadSeqs(pings) 463 if err != nil { 464 return nil, errors.Trace(err) 465 } 466 for _, seq := range deadSeqs { 467 dead[seq] = true 468 logger.Tracef("[%s] found seq=%d dead", w.modelUUID[:6], seq) 469 } 470 return dead, nil 471 } 472 473 // decompressPings looks at a map like 'Alive' and turns it into an array of 474 // sequences that were seen in those maps 475 func decompressPings(maps []map[string]int64) ([]int64, error) { 476 if len(maps) == 0 { 477 return nil, nil 478 } 479 // First step, merge all value structures together. 480 // Every ping has a bit field in an int64. However, bitwise-or preserves 481 // everything that was ever alive without having to decode them multiple times. 482 baseToBits := make(map[string]int64, len(maps[0])) 483 for i := range maps { 484 for hexbase, bits := range maps[i] { 485 baseToBits[hexbase] |= bits 486 } 487 } 488 sequences := make([]int64, 0, len(baseToBits)*30) 489 for hexbase, bits := range baseToBits { 490 base, err := strconv.ParseInt(hexbase, 16, 64) 491 if err != nil { 492 return nil, errors.Annotatef(err, "presence cannot parse alive key: %q", base) 493 } 494 base *= 63 495 for i := int64(0); i < 63 && bits > 0; i++ { 496 on := (bits&1 == 1) 497 bits >>= 1 498 if !on { 499 continue 500 } 501 seq := base + i 502 sequences = append(sequences, seq) 503 } 504 } 505 return sequences, nil 506 } 507 508 func aliveSeqs(pings []pingInfo) ([]int64, error) { 509 maps := make([]map[string]int64, len(pings)) 510 for i := range pings { 511 maps[i] = pings[i].Alive 512 } 513 return decompressPings(maps) 514 } 515 516 func deadSeqs(pings []pingInfo) ([]int64, error) { 517 maps := make([]map[string]int64, len(pings)) 518 for i := range pings { 519 maps[i] = pings[i].Dead 520 } 521 return decompressPings(maps) 522 } 523 524 func (w *Watcher) handleAlive(pings []pingInfo) (map[int64]bool, []int64, error) { 525 // Learn about all the pingers that reported and queue 526 // events for those that weren't known to be alive and 527 // are not reportedly dead either. 528 alive := make(map[int64]bool) 529 unknownSeqs := make([]int64, 0) 530 aliveSeq, err := aliveSeqs(pings) 531 if err != nil { 532 return nil, nil, errors.Trace(err) 533 } 534 for _, seq := range aliveSeq { 535 alive[seq] = true 536 if _, ok := w.beingKey[seq]; ok { 537 // entries in beingKey are ones we consider alive 538 // since we already have this sequence, we 539 // consider this being alive and this as the 540 // active sequence for that being, so we don't 541 // need to do any more work for this sequence 542 continue 543 } 544 if key, ok := w.ignoredSeqs[seq]; ok { 545 // This is a known sequence that has been superseded 546 if _, ok := w.beingSeq[key]; ok { 547 // The sequence that supersedes this one is still alive 548 continue 549 } else { 550 // What was a living pinger has now died, and this one might be promoted to primary pinger 551 // Treat it as Unknown 552 delete(w.ignoredSeqs, seq) 553 } 554 } 555 unknownSeqs = append(unknownSeqs, seq) 556 } 557 return alive, unknownSeqs, nil 558 } 559 560 // lookupUnknownSeqs handles finding new sequences that we weren't already tracking. 561 // Keys that we find are now alive will have a 'found alive' event queued. 562 func (w *Watcher) lookupUnknownSeqs(unknownSeqs []int64, dead map[int64]bool, session *mgo.Session) error { 563 if len(unknownSeqs) == 0 { 564 // Nothing to do, with nothing unknown. 565 return nil 566 } 567 // We do cache *all* beingInfos, but they're reasonably small 568 seqToBeing := make(map[int64]beingInfo, len(unknownSeqs)) 569 startTime := time.Now() 570 beingsC := w.beings.With(session) 571 remaining := unknownSeqs 572 for len(remaining) > 0 { 573 // batch this into reasonable lengths 574 // testing shows that it works just fine at 50,000 ids, but be a 575 // bit more conservative 576 batch := remaining 577 if len(remaining) > lookupBatchSize { 578 batch = remaining[:lookupBatchSize] 579 remaining = remaining[lookupBatchSize:] 580 } else { 581 remaining = nil 582 } 583 docIds := make([]string, len(batch)) 584 for i, seq := range batch { 585 docIds[i] = docIDInt64(w.modelUUID, seq) 586 } 587 query := beingsC.Find(bson.M{"_id": bson.M{"$in": docIds}}) 588 // We don't need the _id returned, as its just a way to lookup the seq, 589 // and _id is quite large 590 query = query.Select(bson.M{"_id": false, "key": true, "seq": true}) 591 query.Batch(lookupBatchSize) 592 beingIter := query.Iter() 593 being := beingInfo{} 594 for beingIter.Next(&being) { 595 seqToBeing[being.Seq] = being 596 w.beingLoads++ 597 } 598 if err := beingIter.Close(); err != nil { 599 if err != mgo.ErrNotFound { 600 // This may be an old sequence, not considered fatal 601 return err 602 } 603 } 604 } 605 rate := "" 606 elapsed := time.Since(startTime) 607 if len(unknownSeqs) > 0 { 608 seqPerMS := float64(len(unknownSeqs)) / (elapsed.Seconds() * 1000.0) 609 rate = fmt.Sprintf(" (%.1fseq/ms)", seqPerMS) 610 } 611 unownedCount := 0 612 for _, seq := range unknownSeqs { 613 being, ok := seqToBeing[seq] 614 if !ok { 615 // Not Found 616 unownedCount++ 617 logger.Tracef("[%s] found seq=%d unowned", w.modelUUID[:6], seq) 618 continue 619 } 620 cur := w.beingSeq[being.Key] 621 if cur < seq { 622 delete(w.beingKey, cur) 623 if cur > 0 { 624 w.ignoredSeqs[cur] = being.Key 625 } 626 } else { 627 // We already have a sequence for this key, and it is 628 // newer than the one we just saw. 629 w.ignoredSeqs[seq] = being.Key 630 continue 631 } 632 // Start tracking the new sequence for this key 633 w.beingKey[seq] = being.Key 634 w.beingSeq[being.Key] = seq 635 if cur > 0 || dead[seq] { 636 // if cur > 0, then we already think this is alive, no 637 // need to queue another message. 638 // if dead[] then we still wouldn't queue an alive message 639 // because we are writing a 'is dead' message. 640 continue 641 } 642 logger.Tracef("[%s] found seq=%d alive with key %q", w.modelUUID[:6], seq, being.Key) 643 for _, ch := range w.watches[being.Key] { 644 w.pending = append(w.pending, event{ch, being.Key, true}) 645 } 646 } 647 logger.Tracef("looked up %d unknown sequences for %q (%d unowned) in %v%s from %q", 648 len(unknownSeqs), w.modelUUID, unownedCount, elapsed, rate, beingsC.Name) 649 return nil 650 } 651 652 // sync updates the watcher knowledge from the database, and 653 // queues events to observing channels. It fetches the last two time 654 // slots and compares the union of both to the in-memory state. 655 func (w *Watcher) sync() error { 656 session := w.pings.Database.Session.Copy() 657 defer session.Close() 658 pings, err := w.lookupPings(session) 659 if err != nil { 660 return err 661 } 662 dead, err := w.lookForDead(pings) 663 if err != nil { 664 return err 665 } 666 alive, unknownSeqs, err := w.handleAlive(pings) 667 if err != nil { 668 return err 669 } 670 err = w.lookupUnknownSeqs(unknownSeqs, dead, session) 671 if err != nil { 672 return err 673 } 674 675 // Pingers that were known to be alive and haven't reported 676 // in the last two slots are now considered dead. Dispatch 677 // the respective events and forget their sequences. 678 for seq, key := range w.beingKey { 679 if dead[seq] || !alive[seq] { 680 logger.Tracef("[%s] removing seq=%d with key %q", w.modelUUID[:6], seq, key) 681 delete(w.beingKey, seq) 682 delete(w.beingSeq, key) 683 for _, ch := range w.watches[key] { 684 w.pending = append(w.pending, event{ch, key, false}) 685 } 686 } 687 } 688 return nil 689 } 690 691 // Pinger periodically reports that a specific key is alive, so that 692 // watchers interested on that fact can react appropriately. 693 type Pinger struct { 694 modelUUID string 695 mu sync.Mutex 696 tomb tomb.Tomb 697 base *mgo.Collection 698 pings *mgo.Collection 699 started bool 700 beingKey string 701 beingSeq int64 702 fieldKey string // hex(beingKey / 63) 703 fieldBit uint64 // 1 << (beingKey%63) 704 lastSlot int64 705 delta time.Duration 706 recorderFunc func() PingRecorder 707 } 708 709 type PingRecorder interface { 710 Ping(modelUUID string, slot int64, fieldKey string, fieldBit uint64) error 711 } 712 713 // NewPinger returns a new Pinger to report that key is alive. 714 // It starts reporting after Start is called. 715 func NewPinger(base *mgo.Collection, modelTag names.ModelTag, key string, recorderFunc func() PingRecorder) *Pinger { 716 return &Pinger{ 717 base: base, 718 pings: pingsC(base), 719 beingKey: key, 720 modelUUID: modelTag.Id(), 721 recorderFunc: recorderFunc, 722 } 723 } 724 725 // Start starts periodically reporting that p's key is alive. 726 func (p *Pinger) Start() error { 727 p.mu.Lock() 728 defer p.mu.Unlock() 729 if p.started { 730 return errors.Errorf("pinger already started") 731 } 732 p.tomb = tomb.Tomb{} 733 if err := p.prepare(); err != nil { 734 return errors.Trace(err) 735 } 736 logger.Tracef("[%s] starting pinger for %q with seq=%d", p.modelUUID[:6], p.beingKey, p.beingSeq) 737 if err := p.ping(); err != nil { 738 return errors.Trace(err) 739 } 740 p.started = true 741 p.tomb.Go(func() error { 742 err := p.loop() 743 cause := errors.Cause(err) 744 // tomb expects ErrDying or ErrStillAlive as 745 // exact values, so we need to log and unwrap 746 // the error first. 747 if err != nil && cause != tomb.ErrDying { 748 logger.Infof("pinger loop failed: %v", err) 749 } 750 return cause 751 }) 752 return nil 753 } 754 755 // Kill is part of the worker.Worker interface. 756 func (p *Pinger) Kill() { 757 p.tomb.Kill(nil) 758 } 759 760 // Wait returns when the Pinger has stopped, and returns the first error 761 // it encountered. 762 func (p *Pinger) Wait() error { 763 return p.tomb.Wait() 764 } 765 766 // Stop stops p's periodical ping. 767 // Watchers will not notice p has stopped pinging until the 768 // previous ping times out. 769 func (p *Pinger) Stop() error { 770 p.mu.Lock() 771 defer p.mu.Unlock() 772 if p.started { 773 logger.Tracef("[%s] stopping pinger for %q with seq=%d", p.modelUUID[:6], p.beingKey, p.beingSeq) 774 } 775 p.tomb.Kill(nil) 776 err := p.tomb.Wait() 777 // TODO ping one more time to guarantee a late timeout. 778 p.started = false 779 return errors.Trace(err) 780 781 } 782 783 // KillForTesting stops p's periodical ping and immediately reports that it is dead. 784 // TODO(ericsnow) We should be able to drop this and the two kill* methods. 785 func (p *Pinger) KillForTesting() error { 786 p.mu.Lock() 787 defer p.mu.Unlock() 788 if p.started { 789 logger.Tracef("killing pinger for %q (was started)", p.beingKey) 790 return p.killStarted() 791 } 792 logger.Tracef("killing pinger for %q (was stopped)", p.beingKey) 793 return p.killStopped() 794 } 795 796 // killStarted kills the pinger while it is running, by first 797 // stopping it and then recording in the last pinged slot that 798 // the pinger was killed. 799 func (p *Pinger) killStarted() error { 800 p.tomb.Kill(nil) 801 killErr := p.tomb.Wait() 802 p.started = false 803 804 slot := p.lastSlot 805 udoc := bson.D{ 806 {"$set", bson.D{{"slot", slot}}}, 807 {"$inc", bson.D{{"dead." + p.fieldKey, p.fieldBit}}}} 808 session := p.pings.Database.Session.Copy() 809 defer session.Close() 810 pings := p.pings.With(session) 811 if _, err := pings.UpsertId(docIDInt64(p.modelUUID, slot), udoc); err != nil { 812 return errors.Trace(err) 813 } 814 return errors.Trace(killErr) 815 } 816 817 // killStopped kills the pinger while it is not running, by 818 // first allocating a new sequence, and then atomically recording 819 // the new sequence both as alive and dead at once. 820 func (p *Pinger) killStopped() error { 821 if err := p.prepare(); err != nil { 822 return err 823 } 824 // TODO(perrito666) 2016-05-02 lp:1558657 825 slot := timeSlot(time.Now(), p.delta) 826 udoc := bson.D{ 827 {"$set", bson.D{{"slot", slot}}}, 828 {"$inc", bson.D{ 829 {"dead." + p.fieldKey, p.fieldBit}, 830 {"alive." + p.fieldKey, p.fieldBit}, 831 }}} 832 session := p.pings.Database.Session.Copy() 833 defer session.Close() 834 pings := p.pings.With(session) 835 _, err := pings.UpsertId(docIDInt64(p.modelUUID, slot), udoc) 836 return errors.Trace(err) 837 } 838 839 // loop is the main pinger loop that runs while it is 840 // in started state. 841 func (p *Pinger) loop() error { 842 for { 843 select { 844 case <-p.tomb.Dying(): 845 return errors.Trace(tomb.ErrDying) 846 case <-time.After(time.Duration(float64(period+1)*0.75) * time.Second): 847 if err := p.ping(); err != nil { 848 return errors.Trace(err) 849 } 850 } 851 } 852 } 853 854 // prepare allocates a new unique sequence for the 855 // pinger key and prepares the pinger to use it. 856 func (p *Pinger) prepare() error { 857 change := mgo.Change{ 858 Update: bson.D{{"$inc", bson.D{{"seq", int64(1)}}}}, 859 Upsert: true, 860 ReturnNew: true, 861 } 862 session := p.base.Database.Session.Copy() 863 defer session.Close() 864 base := p.base.With(session) 865 seqs := seqsC(base) 866 var seq struct{ Seq int64 } 867 seqID := docIDStr(p.modelUUID, "beings") 868 if _, err := seqs.FindId(seqID).Apply(change, &seq); err != nil { 869 return errors.Trace(err) 870 } 871 p.beingSeq = seq.Seq 872 p.fieldKey = fmt.Sprintf("%x", p.beingSeq/63) 873 p.fieldBit = 1 << uint64(p.beingSeq%63) 874 p.lastSlot = 0 875 beings := beingsC(base) 876 return errors.Trace(beings.Insert( 877 beingInfo{ 878 DocID: docIDInt64(p.modelUUID, p.beingSeq), 879 Seq: p.beingSeq, 880 Key: p.beingKey, 881 }, 882 )) 883 } 884 885 // ping records updates the current time slot with the 886 // sequence in use by the pinger. 887 func (p *Pinger) ping() (err error) { 888 logger.Tracef("[%s] pinging %q with seq=%d", p.modelUUID[:6], p.beingKey, p.beingSeq) 889 defer func() { 890 // If the session is killed from underneath us, it panics when we 891 // try to copy it, so deal with that here. 892 if v := recover(); v != nil { 893 err = fmt.Errorf("%v", v) 894 } 895 }() 896 if p.delta == 0 { 897 session := p.pings.Database.Session.Copy() 898 defer session.Close() 899 base := p.base.With(session) 900 delta, err := clockDelta(base) 901 if err != nil { 902 return errors.Trace(err) 903 } 904 p.delta = delta 905 } 906 // TODO(perrito666) 2016-05-02 lp:1558657 907 slot := timeSlot(time.Now(), p.delta) 908 if slot == p.lastSlot { 909 // Never, ever, ping the same slot twice. 910 // The increment below would corrupt the slot. 911 return nil 912 } 913 p.lastSlot = slot 914 p.recorderFunc().Ping(p.modelUUID, slot, p.fieldKey, p.fieldBit) 915 return errors.Trace(err) 916 } 917 918 // collapsedBeingsInfo tracks the result of aggregating all of the items in the 919 // beings table by their key. 920 type collapsedBeingsInfo struct { 921 Key string `bson:"_id"` 922 Seqs []int64 `bson:"seqs"` 923 } 924 925 // clockDelta returns the approximate skew between 926 // the local clock and the database clock. 927 func clockDelta(c *mgo.Collection) (time.Duration, error) { 928 var server struct { 929 time.Time `bson:"retval"` 930 } 931 var isMaster struct { 932 LocalTime time.Time `bson:"localTime"` 933 } 934 var after time.Time 935 var before time.Time 936 var serverDelay time.Duration 937 supportsMasterLocalTime := true 938 session := c.Database.Session.Copy() 939 defer session.Close() 940 db := c.Database.With(session) 941 for i := 0; i < 10; i++ { 942 if supportsMasterLocalTime { 943 // Try isMaster.localTime, which is present since MongoDB 2.2 944 // and does not require admin privileges. 945 // TODO(perrito666) 2016-05-02 lp:1558657 946 before = time.Now() 947 err := db.Run("isMaster", &isMaster) 948 // TODO(perrito666) 2016-05-02 lp:1558657 949 after = time.Now() 950 if err != nil { 951 return 0, errors.Trace(err) 952 } 953 if isMaster.LocalTime.IsZero() { 954 supportsMasterLocalTime = false 955 continue 956 } else { 957 serverDelay = isMaster.LocalTime.Sub(before) 958 } 959 } else { 960 // If MongoDB doesn't have localTime as part of 961 // isMaster result, it means that the server is likely 962 // a MongoDB older than 2.2. 963 // 964 // Fallback to 'eval' works fine on versions older than 965 // 2.4 where it does not require admin privileges. 966 // 967 // NOTE: 'eval' takes a global write lock unless you 968 // specify 'nolock' (which we are not doing below, for 969 // no apparent reason), so it is quite likely that the 970 // eval could take a relatively long time to acquire 971 // the lock and thus cause a retry on the callDelay 972 // check below on a busy server. 973 // TODO(perrito666) 2016-05-02 lp:1558657 974 before = time.Now() 975 err := db.Run(bson.D{{"$eval", "function() { return new Date(); }"}}, &server) 976 // TODO(perrito666) 2016-05-02 lp:1558657 977 after = time.Now() 978 if err != nil { 979 return 0, errors.Trace(err) 980 } 981 serverDelay = server.Sub(before) 982 } 983 // If the call to the server takes longer than a few seconds we 984 // retry it a couple more times before giving up. It is unclear 985 // why the retry would help at all here. 986 // 987 // If the server takes longer than the specified amount of time 988 // on every single try, then we simply give up. 989 callDelay := after.Sub(before) 990 if callDelay > 5*time.Second { 991 continue 992 } 993 return serverDelay, nil 994 } 995 return 0, errors.Errorf("cannot synchronize clock with database server") 996 } 997 998 // timeSlot returns the current time slot, in seconds since the 999 // epoch, for the provided now time. The delta skew is applied 1000 // to the now time to improve the synchronization with a 1001 // centrally agreed time. 1002 // 1003 // The result of this method may be manipulated for test purposes 1004 // by fakeTimeSlot and realTimeSlot. 1005 func timeSlot(now time.Time, delta time.Duration) int64 { 1006 fakeMutex.Lock() 1007 fake := !fakeNow.IsZero() 1008 if fake { 1009 now = fakeNow 1010 } 1011 slot := now.Add(delta).Unix() 1012 slot -= slot % period 1013 if fake { 1014 slot += int64(fakeOffset) * period 1015 } 1016 fakeMutex.Unlock() 1017 return slot 1018 } 1019 1020 var ( 1021 fakeMutex sync.Mutex // protects fakeOffset, fakeNow 1022 fakeNow time.Time 1023 fakeOffset int 1024 ) 1025 1026 // fakeTimeSlot hardcodes the slot time returned by the timeSlot 1027 // function for testing purposes. The offset parameter is the slot 1028 // position to return: offsets +1 and -1 are +period and -period 1029 // seconds from slot 0, respectively. 1030 func fakeTimeSlot(offset int) { 1031 fakeMutex.Lock() 1032 if fakeNow.IsZero() { 1033 fakeNow = time.Now() 1034 } 1035 fakeOffset = offset 1036 fakeMutex.Unlock() 1037 logger.Infof("faking presence to time slot %d", offset) 1038 } 1039 1040 // realTimeSlot disables the hardcoding introduced by fakeTimeSlot. 1041 func realTimeSlot() { 1042 fakeMutex.Lock() 1043 fakeNow = time.Time{} 1044 fakeOffset = 0 1045 fakeMutex.Unlock() 1046 logger.Infof("not faking presence time. Real time slot in use.") 1047 } 1048 1049 func seqsC(base *mgo.Collection) *mgo.Collection { 1050 return base.Database.C(base.Name + ".seqs") 1051 } 1052 1053 func beingsC(base *mgo.Collection) *mgo.Collection { 1054 return base.Database.C(base.Name + ".beings") 1055 } 1056 1057 func pingsC(base *mgo.Collection) *mgo.Collection { 1058 return base.Database.C(base.Name + ".pings") 1059 } 1060 1061 func removeModelFromCollection(coll *mgo.Collection, modelUUID string) error { 1062 modelIDMatch := bson.M{"_id": bson.RegEx{"^" + modelUUID + ":", ""}} 1063 if changed, err := coll.RemoveAll(modelIDMatch); err != nil { 1064 if err == mgo.ErrNotFound { 1065 // not a worthy error 1066 return nil 1067 } 1068 return errors.Trace(err) 1069 } else { 1070 logger.Debugf("removed %d entries from %q for model %q", 1071 changed.Removed, coll.FullName, modelUUID) 1072 } 1073 return nil 1074 } 1075 1076 // RemovePresenceForModel removes all of the records of entities for a given model 1077 // across all of the collections. 1078 func RemovePresenceForModel(base *mgo.Collection, modelTag names.ModelTag) error { 1079 errs := make([]error, 0) 1080 for _, f := range []func(*mgo.Collection) *mgo.Collection{pingsC, beingsC, seqsC} { 1081 err := removeModelFromCollection(f(base), modelTag.Id()) 1082 if err != nil { 1083 errs = append(errs, err) 1084 } 1085 } 1086 if len(errs) != 0 { 1087 return errors.Errorf("errors removing presence for model %q: %v", modelTag.Id(), errs) 1088 } 1089 return nil 1090 }