github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/state/presence/presence.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 // The presence package implements an interface for observing liveness 5 // of arbitrary keys (agents, processes, etc) on top of MongoDB. 6 // The design works by periodically updating the database so that 7 // watchers can tell an arbitrary key is alive. 8 package presence 9 10 import ( 11 "fmt" 12 "strconv" 13 "sync" 14 "time" 15 16 "github.com/juju/errors" 17 "github.com/juju/loggo" 18 "github.com/juju/names" 19 "gopkg.in/mgo.v2" 20 "gopkg.in/mgo.v2/bson" 21 "launchpad.net/tomb" 22 ) 23 24 var logger = loggo.GetLogger("juju.state.presence") 25 26 type Presencer interface { 27 AgentPresence() (bool, error) 28 SetAgentPresence() (*Pinger, error) 29 WaitAgentPresence(time.Duration) error 30 } 31 32 // docIDInt64 generates a globally unique id value 33 // where the environment uuid is prefixed to the 34 // given int64 localID. 35 func docIDInt64(envUUID string, localID int64) string { 36 return envUUID + ":" + strconv.FormatInt(localID, 10) 37 } 38 39 // docIDStr generates a globally unique id value 40 // where the environment uuid is prefixed to the 41 // given string localID. 42 func docIDStr(envUUID string, localID string) string { 43 return envUUID + ":" + localID 44 } 45 46 // The implementation works by assigning a unique sequence number to each 47 // pinger that is alive, and the pinger is then responsible for 48 // periodically updating the current time slot document with its 49 // sequence number so that watchers can tell it is alive. 50 // 51 // There is only one time slot document per time slot, per environment. The 52 // internal implementation of the time slot document is as follows: 53 // 54 // { 55 // "_id": <environ UUID>:<time slot>, 56 // "slot": <slot>, 57 // "env-uuid": <environ UUID>, 58 // "alive": { hex(<pinger seq> / 63) : (1 << (<pinger seq> % 63) | <others>) }, 59 // "dead": { hex(<pinger seq> / 63) : (1 << (<pinger seq> % 63) | <others>) }, 60 // } 61 // 62 // All pingers that have their sequence number under "alive" and not 63 // under "dead" are currently alive. This design enables implementing 64 // a ping with a single update operation, a kill with another operation, 65 // and obtaining liveness data with a single query that returns two 66 // documents (the last two time slots). 67 // 68 // A new pinger sequence is obtained every time a pinger starts by atomically 69 // incrementing a counter in a document in a helper collection. There is only 70 // one such document per environment. That sequence number is then inserted 71 // into the beings collection to establish the mapping between pinger sequence 72 // and key. 73 74 // BUG(gn): The pings and beings collection currently grow without bound. 75 76 // A Watcher can watch any number of pinger keys for liveness changes. 77 type Watcher struct { 78 envUUID string 79 tomb tomb.Tomb 80 base *mgo.Collection 81 pings *mgo.Collection 82 beings *mgo.Collection 83 84 // delta is an approximate clock skew between the local system 85 // clock and the database clock. 86 delta time.Duration 87 88 // beingKey and beingSeq are the pinger seq <=> key mappings. 89 // Entries in these maps are considered alive. 90 beingKey map[int64]string 91 beingSeq map[string]int64 92 93 // watches has the per-key observer channels from Watch/Unwatch. 94 watches map[string][]chan<- Change 95 96 // pending contains all the events to be dispatched to the watcher 97 // channels. They're queued during processing and flushed at the 98 // end to simplify the algorithm. 99 pending []event 100 101 // request is used to deliver requests from the public API into 102 // the the gorotuine loop. 103 request chan interface{} 104 105 // syncDone contains pending done channels from sync requests. 106 syncDone []chan bool 107 108 // next will dispatch when it's time to sync the database 109 // knowledge. It's maintained here so that ForceRefresh 110 // can manipulate it to force a sync sooner. 111 next <-chan time.Time 112 } 113 114 type event struct { 115 ch chan<- Change 116 key string 117 alive bool 118 } 119 120 // Change holds a liveness change notification. 121 type Change struct { 122 Key string 123 Alive bool 124 } 125 126 // NewWatcher returns a new Watcher. 127 func NewWatcher(base *mgo.Collection, envTag names.EnvironTag) *Watcher { 128 w := &Watcher{ 129 envUUID: envTag.Id(), 130 base: base, 131 pings: pingsC(base), 132 beings: beingsC(base), 133 beingKey: make(map[int64]string), 134 beingSeq: make(map[string]int64), 135 watches: make(map[string][]chan<- Change), 136 request: make(chan interface{}), 137 } 138 go func() { 139 err := w.loop() 140 cause := errors.Cause(err) 141 // tomb expects ErrDying or ErrStillAlive as 142 // exact values, so we need to log and unwrap 143 // the error first. 144 if err != nil && cause != tomb.ErrDying { 145 logger.Infof("watcher loop failed: %v", err) 146 } 147 w.tomb.Kill(cause) 148 w.tomb.Done() 149 }() 150 return w 151 } 152 153 // Stop stops all the watcher activities. 154 func (w *Watcher) Stop() error { 155 w.tomb.Kill(nil) 156 return errors.Trace(w.tomb.Wait()) 157 } 158 159 // Dead returns a channel that is closed when the watcher has stopped. 160 func (w *Watcher) Dead() <-chan struct{} { 161 return w.tomb.Dead() 162 } 163 164 // Err returns the error with which the watcher stopped. 165 // It returns nil if the watcher stopped cleanly, tomb.ErrStillAlive 166 // if the watcher is still running properly, or the respective error 167 // if the watcher is terminating or has terminated with an error. 168 func (w *Watcher) Err() error { 169 return w.tomb.Err() 170 } 171 172 type reqWatch struct { 173 key string 174 ch chan<- Change 175 } 176 177 type reqUnwatch struct { 178 key string 179 ch chan<- Change 180 } 181 182 type reqSync struct { 183 done chan bool 184 } 185 186 type reqAlive struct { 187 key string 188 result chan bool 189 } 190 191 func (w *Watcher) sendReq(req interface{}) { 192 select { 193 case w.request <- req: 194 case <-w.tomb.Dying(): 195 } 196 } 197 198 // Watch starts watching the liveness of key. An event will 199 // be sent onto ch to report the initial status for the key, and 200 // from then on a new event will be sent whenever a change is 201 // detected. Change values sent to the channel must be consumed, 202 // or the whole watcher will blocked. 203 func (w *Watcher) Watch(key string, ch chan<- Change) { 204 w.sendReq(reqWatch{key, ch}) 205 } 206 207 // Unwatch stops watching the liveness of key via ch. 208 func (w *Watcher) Unwatch(key string, ch chan<- Change) { 209 w.sendReq(reqUnwatch{key, ch}) 210 } 211 212 // StartSync forces the watcher to load new events from the database. 213 func (w *Watcher) StartSync() { 214 w.sendReq(reqSync{nil}) 215 } 216 217 // Sync forces the watcher to load new events from the database and blocks 218 // until all events have been dispatched. 219 func (w *Watcher) Sync() { 220 done := make(chan bool) 221 w.sendReq(reqSync{done}) 222 select { 223 case <-done: 224 case <-w.tomb.Dying(): 225 } 226 } 227 228 // Alive returns whether the key is currently considered alive by w, 229 // or an error in case the watcher is dying. 230 func (w *Watcher) Alive(key string) (bool, error) { 231 result := make(chan bool, 1) 232 w.sendReq(reqAlive{key, result}) 233 var alive bool 234 select { 235 case alive = <-result: 236 case <-w.tomb.Dying(): 237 return false, errors.Errorf("cannot check liveness: watcher is dying") 238 } 239 return alive, nil 240 } 241 242 // period is the length of each time slot in seconds. 243 // It's not a time.Duration because the code is more convenient like 244 // this and also because sub-second timings don't work as the slot 245 // identifier is an int64 in seconds. 246 var period int64 = 30 247 248 // loop implements the main watcher loop. 249 func (w *Watcher) loop() error { 250 var err error 251 if w.delta, err = clockDelta(w.base); err != nil { 252 return errors.Trace(err) 253 } 254 w.next = time.After(0) 255 for { 256 select { 257 case <-w.tomb.Dying(): 258 return errors.Trace(tomb.ErrDying) 259 case <-w.next: 260 w.next = time.After(time.Duration(period) * time.Second) 261 syncDone := w.syncDone 262 w.syncDone = nil 263 if err := w.sync(); err != nil { 264 return errors.Trace(err) 265 } 266 w.flush() 267 for _, done := range syncDone { 268 close(done) 269 } 270 case req := <-w.request: 271 w.handle(req) 272 w.flush() 273 } 274 } 275 } 276 277 // flush sends all pending events to their respective channels. 278 func (w *Watcher) flush() { 279 // w.pending may get new requests as we handle other requests. 280 for i := 0; i < len(w.pending); i++ { 281 e := &w.pending[i] 282 for e.ch != nil { 283 select { 284 case <-w.tomb.Dying(): 285 return 286 case req := <-w.request: 287 w.handle(req) 288 continue 289 case e.ch <- Change{e.key, e.alive}: 290 } 291 break 292 } 293 } 294 w.pending = w.pending[:0] 295 } 296 297 // handle deals with requests delivered by the public API 298 // onto the background watcher goroutine. 299 func (w *Watcher) handle(req interface{}) { 300 logger.Tracef("got request: %#v", req) 301 switch r := req.(type) { 302 case reqSync: 303 w.next = time.After(0) 304 if r.done != nil { 305 w.syncDone = append(w.syncDone, r.done) 306 } 307 case reqWatch: 308 for _, ch := range w.watches[r.key] { 309 if ch == r.ch { 310 panic("adding channel twice for same key") 311 } 312 } 313 w.watches[r.key] = append(w.watches[r.key], r.ch) 314 _, alive := w.beingSeq[r.key] 315 w.pending = append(w.pending, event{r.ch, r.key, alive}) 316 case reqUnwatch: 317 watches := w.watches[r.key] 318 for i, ch := range watches { 319 if ch == r.ch { 320 watches[i] = watches[len(watches)-1] 321 w.watches[r.key] = watches[:len(watches)-1] 322 break 323 } 324 } 325 for i := range w.pending { 326 e := &w.pending[i] 327 if e.key == r.key && e.ch == r.ch { 328 e.ch = nil 329 } 330 } 331 case reqAlive: 332 _, alive := w.beingSeq[r.key] 333 r.result <- alive 334 default: 335 panic(fmt.Errorf("unknown request: %T", req)) 336 } 337 } 338 339 type beingInfo struct { 340 DocID string `bson:"_id,omitempty"` 341 Seq int64 `bson:"seq,omitempty"` 342 EnvUUID string `bson:"env-uuid,omitempty"` 343 Key string `bson:"key,omitempty"` 344 } 345 346 type pingInfo struct { 347 DocID string `bson:"_id,omitempty"` 348 Slot int64 `bson:"slot,omitempty"` 349 Alive map[string]int64 `bson:",omitempty"` 350 Dead map[string]int64 `bson:",omitempty"` 351 } 352 353 func (w *Watcher) findAllBeings() (map[int64]beingInfo, error) { 354 beings := make([]beingInfo, 0) 355 session := w.beings.Database.Session.Copy() 356 defer session.Close() 357 beingsC := w.beings.With(session) 358 359 err := beingsC.Find(bson.D{{"env-uuid", w.envUUID}}).All(&beings) 360 if err != nil { 361 return nil, err 362 } 363 beingInfos := make(map[int64]beingInfo, len(beings)) 364 for _, being := range beings { 365 beingInfos[being.Seq] = being 366 } 367 return beingInfos, nil 368 } 369 370 // sync updates the watcher knowledge from the database, and 371 // queues events to observing channels. It fetches the last two time 372 // slots and compares the union of both to the in-memory state. 373 func (w *Watcher) sync() error { 374 var allBeings map[int64]beingInfo 375 if len(w.beingKey) == 0 { 376 // The very first time we sync, we grab all ever-known beings, 377 // so we don't have to look them up one-by-one 378 var err error 379 if allBeings, err = w.findAllBeings(); err != nil { 380 return errors.Trace(err) 381 } 382 } 383 s := timeSlot(time.Now(), w.delta) 384 slot := docIDInt64(w.envUUID, s) 385 previousSlot := docIDInt64(w.envUUID, s-period) 386 session := w.pings.Database.Session.Copy() 387 defer session.Close() 388 pings := w.pings.With(session) 389 var ping []pingInfo 390 q := bson.D{{"$or", []pingInfo{{DocID: slot}, {DocID: previousSlot}}}} 391 err := pings.Find(q).All(&ping) 392 if err != nil && err == mgo.ErrNotFound { 393 return errors.Trace(err) 394 } 395 396 // Learn about all enforced deaths. 397 dead := make(map[int64]bool) 398 for i := range ping { 399 for key, value := range ping[i].Dead { 400 k, err := strconv.ParseInt(key, 16, 64) 401 if err != nil { 402 err = errors.Annotatef(err, "presence cannot parse dead key: %q", key) 403 panic(err) 404 } 405 k *= 63 406 for i := int64(0); i < 63 && value > 0; i++ { 407 on := value&1 == 1 408 value >>= 1 409 if !on { 410 continue 411 } 412 seq := k + i 413 dead[seq] = true 414 logger.Tracef("found seq=%d dead", seq) 415 } 416 } 417 } 418 419 // Learn about all the pingers that reported and queue 420 // events for those that weren't known to be alive and 421 // are not reportedly dead either. 422 beingsC := w.beings.With(session) 423 alive := make(map[int64]bool) 424 being := beingInfo{} 425 for i := range ping { 426 for key, value := range ping[i].Alive { 427 k, err := strconv.ParseInt(key, 16, 64) 428 if err != nil { 429 err = errors.Annotatef(err, "presence cannot parse alive key: %q", key) 430 panic(err) 431 } 432 k *= 63 433 for i := int64(0); i < 63 && value > 0; i++ { 434 on := value&1 == 1 435 value >>= 1 436 if !on { 437 continue 438 } 439 seq := k + i 440 alive[seq] = true 441 if _, ok := w.beingKey[seq]; ok { 442 continue 443 } 444 // Check if the being exists in the 'all' map, 445 // otherwise do a single lookup in mongo 446 var ok bool 447 if being, ok = allBeings[seq]; !ok { 448 err := beingsC.Find(bson.D{{"_id", docIDInt64(w.envUUID, seq)}}).One(&being) 449 if err == mgo.ErrNotFound { 450 logger.Tracef("found seq=%d unowned", seq) 451 continue 452 } else if err != nil { 453 return errors.Trace(err) 454 } 455 } 456 cur := w.beingSeq[being.Key] 457 if cur < seq { 458 delete(w.beingKey, cur) 459 } else { 460 // Current sequence is more recent. 461 continue 462 } 463 w.beingKey[seq] = being.Key 464 w.beingSeq[being.Key] = seq 465 if cur > 0 || dead[seq] { 466 continue 467 } 468 logger.Tracef("found seq=%d alive with key %q", seq, being.Key) 469 for _, ch := range w.watches[being.Key] { 470 w.pending = append(w.pending, event{ch, being.Key, true}) 471 } 472 } 473 } 474 } 475 476 // Pingers that were known to be alive and haven't reported 477 // in the last two slots are now considered dead. Dispatch 478 // the respective events and forget their sequences. 479 for seq, key := range w.beingKey { 480 if dead[seq] || !alive[seq] { 481 delete(w.beingKey, seq) 482 delete(w.beingSeq, key) 483 for _, ch := range w.watches[key] { 484 w.pending = append(w.pending, event{ch, key, false}) 485 } 486 } 487 } 488 return nil 489 } 490 491 // Pinger periodically reports that a specific key is alive, so that 492 // watchers interested on that fact can react appropriately. 493 type Pinger struct { 494 envUUID string 495 mu sync.Mutex 496 tomb tomb.Tomb 497 base *mgo.Collection 498 pings *mgo.Collection 499 started bool 500 beingKey string 501 beingSeq int64 502 fieldKey string // hex(beingKey / 63) 503 fieldBit uint64 // 1 << (beingKey%63) 504 lastSlot int64 505 delta time.Duration 506 } 507 508 // NewPinger returns a new Pinger to report that key is alive. 509 // It starts reporting after Start is called. 510 func NewPinger(base *mgo.Collection, envTag names.EnvironTag, key string) *Pinger { 511 return &Pinger{ 512 base: base, 513 pings: pingsC(base), 514 beingKey: key, 515 envUUID: envTag.Id(), 516 } 517 } 518 519 // Start starts periodically reporting that p's key is alive. 520 func (p *Pinger) Start() error { 521 p.mu.Lock() 522 defer p.mu.Unlock() 523 if p.started { 524 return errors.Errorf("pinger already started") 525 } 526 p.tomb = tomb.Tomb{} 527 if err := p.prepare(); err != nil { 528 return errors.Trace(err) 529 } 530 logger.Tracef("starting pinger for %q with seq=%d", p.beingKey, p.beingSeq) 531 if err := p.ping(); err != nil { 532 return errors.Trace(err) 533 } 534 p.started = true 535 go func() { 536 err := p.loop() 537 cause := errors.Cause(err) 538 // tomb expects ErrDying or ErrStillAlive as 539 // exact values, so we need to log and unwrap 540 // the error first. 541 if err != nil && cause != tomb.ErrDying { 542 logger.Infof("pinger loop failed: %v", err) 543 } 544 p.tomb.Kill(cause) 545 p.tomb.Done() 546 }() 547 return nil 548 } 549 550 // Stop stops p's periodical ping. 551 // Watchers will not notice p has stopped pinging until the 552 // previous ping times out. 553 func (p *Pinger) Stop() error { 554 p.mu.Lock() 555 defer p.mu.Unlock() 556 if p.started { 557 logger.Tracef("stopping pinger for %q with seq=%d", p.beingKey, p.beingSeq) 558 } 559 p.tomb.Kill(nil) 560 err := p.tomb.Wait() 561 // TODO ping one more time to guarantee a late timeout. 562 p.started = false 563 return errors.Trace(err) 564 565 } 566 567 // Kill stops p's periodical ping and immediately reports that it is dead. 568 func (p *Pinger) Kill() error { 569 p.mu.Lock() 570 defer p.mu.Unlock() 571 if p.started { 572 logger.Tracef("killing pinger for %q (was started)", p.beingKey) 573 return p.killStarted() 574 } 575 logger.Tracef("killing pinger for %q (was stopped)", p.beingKey) 576 return p.killStopped() 577 } 578 579 // killStarted kills the pinger while it is running, by first 580 // stopping it and then recording in the last pinged slot that 581 // the pinger was killed. 582 func (p *Pinger) killStarted() error { 583 p.tomb.Kill(nil) 584 killErr := p.tomb.Wait() 585 p.started = false 586 587 slot := p.lastSlot 588 udoc := bson.D{ 589 {"$set", bson.D{{"slot", slot}}}, 590 {"$inc", bson.D{{"dead." + p.fieldKey, p.fieldBit}}}} 591 session := p.pings.Database.Session.Copy() 592 defer session.Close() 593 pings := p.pings.With(session) 594 if _, err := pings.UpsertId(docIDInt64(p.envUUID, slot), udoc); err != nil { 595 return errors.Trace(err) 596 } 597 return errors.Trace(killErr) 598 } 599 600 // killStopped kills the pinger while it is not running, by 601 // first allocating a new sequence, and then atomically recording 602 // the new sequence both as alive and dead at once. 603 func (p *Pinger) killStopped() error { 604 if err := p.prepare(); err != nil { 605 return err 606 } 607 slot := timeSlot(time.Now(), p.delta) 608 udoc := bson.D{ 609 {"$set", bson.D{{"slot", slot}}}, 610 {"$inc", bson.D{ 611 {"dead." + p.fieldKey, p.fieldBit}, 612 {"alive." + p.fieldKey, p.fieldBit}, 613 }}} 614 session := p.pings.Database.Session.Copy() 615 defer session.Close() 616 pings := p.pings.With(session) 617 _, err := pings.UpsertId(docIDInt64(p.envUUID, slot), udoc) 618 return errors.Trace(err) 619 } 620 621 // loop is the main pinger loop that runs while it is 622 // in started state. 623 func (p *Pinger) loop() error { 624 for { 625 select { 626 case <-p.tomb.Dying(): 627 return errors.Trace(tomb.ErrDying) 628 case <-time.After(time.Duration(float64(period+1)*0.75) * time.Second): 629 if err := p.ping(); err != nil { 630 return errors.Trace(err) 631 } 632 } 633 } 634 } 635 636 // prepare allocates a new unique sequence for the 637 // pinger key and prepares the pinger to use it. 638 func (p *Pinger) prepare() error { 639 change := mgo.Change{ 640 Update: bson.D{{"$inc", bson.D{{"seq", int64(1)}}}}, 641 Upsert: true, 642 ReturnNew: true, 643 } 644 session := p.base.Database.Session.Copy() 645 defer session.Close() 646 base := p.base.With(session) 647 seqs := seqsC(base) 648 var seq struct{ Seq int64 } 649 seqID := docIDStr(p.envUUID, "beings") 650 if _, err := seqs.FindId(seqID).Apply(change, &seq); err != nil { 651 return errors.Trace(err) 652 } 653 p.beingSeq = seq.Seq 654 p.fieldKey = fmt.Sprintf("%x", p.beingSeq/63) 655 p.fieldBit = 1 << uint64(p.beingSeq%63) 656 p.lastSlot = 0 657 beings := beingsC(base) 658 return errors.Trace(beings.Insert( 659 beingInfo{ 660 DocID: docIDInt64(p.envUUID, p.beingSeq), 661 Seq: p.beingSeq, 662 EnvUUID: p.envUUID, 663 Key: p.beingKey, 664 }, 665 )) 666 } 667 668 // ping records updates the current time slot with the 669 // sequence in use by the pinger. 670 func (p *Pinger) ping() (err error) { 671 logger.Tracef("pinging %q with seq=%d", p.beingKey, p.beingSeq) 672 defer func() { 673 // If the session is killed from underneath us, it panics when we 674 // try to copy it, so deal with that here. 675 if v := recover(); v != nil { 676 if v == "Session already closed" { 677 return 678 } 679 err = fmt.Errorf("%v", v) 680 } 681 }() 682 session := p.pings.Database.Session.Copy() 683 defer session.Close() 684 if p.delta == 0 { 685 base := p.base.With(session) 686 delta, err := clockDelta(base) 687 if err != nil { 688 return errors.Trace(err) 689 } 690 p.delta = delta 691 } 692 slot := timeSlot(time.Now(), p.delta) 693 if slot == p.lastSlot { 694 // Never, ever, ping the same slot twice. 695 // The increment below would corrupt the slot. 696 return nil 697 } 698 p.lastSlot = slot 699 pings := p.pings.With(session) 700 _, err = pings.UpsertId( 701 docIDInt64(p.envUUID, slot), 702 bson.D{ 703 {"$set", bson.D{{"slot", slot}}}, 704 {"$inc", bson.D{{"alive." + p.fieldKey, p.fieldBit}}}, 705 }) 706 return errors.Trace(err) 707 } 708 709 // clockDelta returns the approximate skew between 710 // the local clock and the database clock. 711 func clockDelta(c *mgo.Collection) (time.Duration, error) { 712 var server struct { 713 time.Time `bson:"retval"` 714 } 715 var isMaster struct { 716 LocalTime time.Time `bson:"localTime"` 717 } 718 var after time.Time 719 var before time.Time 720 var serverDelay time.Duration 721 supportsMasterLocalTime := true 722 session := c.Database.Session.Copy() 723 defer session.Close() 724 db := c.Database.With(session) 725 for i := 0; i < 10; i++ { 726 if supportsMasterLocalTime { 727 // Try isMaster.localTime, which is present since MongoDB 2.2 728 // and does not require admin privileges. 729 before = time.Now() 730 err := db.Run("isMaster", &isMaster) 731 after = time.Now() 732 if err != nil { 733 return 0, errors.Trace(err) 734 } 735 if isMaster.LocalTime.IsZero() { 736 supportsMasterLocalTime = false 737 continue 738 } else { 739 serverDelay = isMaster.LocalTime.Sub(before) 740 } 741 } else { 742 // If MongoDB doesn't have localTime as part of 743 // isMaster result, it means that the server is likely 744 // a MongoDB older than 2.2. 745 // 746 // Fallback to 'eval' works fine on versions older than 747 // 2.4 where it does not require admin privileges. 748 // 749 // NOTE: 'eval' takes a global write lock unless you 750 // specify 'nolock' (which we are not doing below, for 751 // no apparent reason), so it is quite likely that the 752 // eval could take a relatively long time to acquire 753 // the lock and thus cause a retry on the callDelay 754 // check below on a busy server. 755 before = time.Now() 756 err := db.Run(bson.D{{"$eval", "function() { return new Date(); }"}}, &server) 757 after = time.Now() 758 if err != nil { 759 return 0, errors.Trace(err) 760 } 761 serverDelay = server.Sub(before) 762 } 763 // If the call to the server takes longer than a few seconds we 764 // retry it a couple more times before giving up. It is unclear 765 // why the retry would help at all here. 766 // 767 // If the server takes longer than the specified amount of time 768 // on every single try, then we simply give up. 769 callDelay := after.Sub(before) 770 if callDelay > 5*time.Second { 771 continue 772 } 773 return serverDelay, nil 774 } 775 return 0, errors.Errorf("cannot synchronize clock with database server") 776 } 777 778 // timeSlot returns the current time slot, in seconds since the 779 // epoch, for the provided now time. The delta skew is applied 780 // to the now time to improve the synchronization with a 781 // centrally agreed time. 782 // 783 // The result of this method may be manipulated for test purposes 784 // by fakeTimeSlot and realTimeSlot. 785 func timeSlot(now time.Time, delta time.Duration) int64 { 786 fakeMutex.Lock() 787 fake := !fakeNow.IsZero() 788 if fake { 789 now = fakeNow 790 } 791 slot := now.Add(delta).Unix() 792 slot -= slot % period 793 if fake { 794 slot += int64(fakeOffset) * period 795 } 796 fakeMutex.Unlock() 797 return slot 798 } 799 800 var ( 801 fakeMutex sync.Mutex // protects fakeOffset, fakeNow 802 fakeNow time.Time 803 fakeOffset int 804 ) 805 806 // fakeTimeSlot hardcodes the slot time returned by the timeSlot 807 // function for testing purposes. The offset parameter is the slot 808 // position to return: offsets +1 and -1 are +period and -period 809 // seconds from slot 0, respectively. 810 func fakeTimeSlot(offset int) { 811 fakeMutex.Lock() 812 if fakeNow.IsZero() { 813 fakeNow = time.Now() 814 } 815 fakeOffset = offset 816 fakeMutex.Unlock() 817 logger.Infof("faking presence to time slot %d", offset) 818 } 819 820 // realTimeSlot disables the hardcoding introduced by fakeTimeSlot. 821 func realTimeSlot() { 822 fakeMutex.Lock() 823 fakeNow = time.Time{} 824 fakeOffset = 0 825 fakeMutex.Unlock() 826 logger.Infof("not faking presence time. Real time slot in use.") 827 } 828 829 func seqsC(base *mgo.Collection) *mgo.Collection { 830 return base.Database.C(base.Name + ".seqs") 831 } 832 833 func beingsC(base *mgo.Collection) *mgo.Collection { 834 return base.Database.C(base.Name + ".beings") 835 } 836 837 func pingsC(base *mgo.Collection) *mgo.Collection { 838 return base.Database.C(base.Name + ".pings") 839 }