github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/worker/uniter/remotestate/watcher.go (about) 1 // Copyright 2012-2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package remotestate 5 6 import ( 7 "sync" 8 "time" 9 10 "github.com/juju/errors" 11 "github.com/juju/loggo" 12 "gopkg.in/juju/names.v2" 13 14 "github.com/juju/juju/apiserver/params" 15 "github.com/juju/juju/core/leadership" 16 "github.com/juju/juju/watcher" 17 "github.com/juju/juju/worker" 18 "github.com/juju/juju/worker/catacomb" 19 ) 20 21 var logger = loggo.GetLogger("juju.worker.uniter.remotestate") 22 23 // RemoteStateWatcher collects unit, service, and service config information 24 // from separate state watchers, and updates a Snapshot which is sent on a 25 // channel upon change. 26 type RemoteStateWatcher struct { 27 st State 28 unit Unit 29 service Application 30 relations map[names.RelationTag]*relationUnitsWatcher 31 relationUnitsChanges chan relationUnitsChange 32 storageAttachmentWatchers map[names.StorageTag]*storageAttachmentWatcher 33 storageAttachmentChanges chan storageAttachmentChange 34 leadershipTracker leadership.Tracker 35 updateStatusChannel func() <-chan time.Time 36 commandChannel <-chan string 37 retryHookChannel <-chan struct{} 38 39 catacomb catacomb.Catacomb 40 41 out chan struct{} 42 mu sync.Mutex 43 current Snapshot 44 } 45 46 // WatcherConfig holds configuration parameters for the 47 // remote state watcher. 48 type WatcherConfig struct { 49 State State 50 LeadershipTracker leadership.Tracker 51 UpdateStatusChannel func() <-chan time.Time 52 CommandChannel <-chan string 53 RetryHookChannel <-chan struct{} 54 UnitTag names.UnitTag 55 } 56 57 // NewWatcher returns a RemoteStateWatcher that handles state changes pertaining to the 58 // supplied unit. 59 func NewWatcher(config WatcherConfig) (*RemoteStateWatcher, error) { 60 w := &RemoteStateWatcher{ 61 st: config.State, 62 relations: make(map[names.RelationTag]*relationUnitsWatcher), 63 relationUnitsChanges: make(chan relationUnitsChange), 64 storageAttachmentWatchers: make(map[names.StorageTag]*storageAttachmentWatcher), 65 storageAttachmentChanges: make(chan storageAttachmentChange), 66 leadershipTracker: config.LeadershipTracker, 67 updateStatusChannel: config.UpdateStatusChannel, 68 commandChannel: config.CommandChannel, 69 retryHookChannel: config.RetryHookChannel, 70 // Note: it is important that the out channel be buffered! 71 // The remote state watcher will perform a non-blocking send 72 // on the channel to wake up the observer. It is non-blocking 73 // so that we coalesce events while the observer is busy. 74 out: make(chan struct{}, 1), 75 current: Snapshot{ 76 Relations: make(map[int]RelationSnapshot), 77 Storage: make(map[names.StorageTag]StorageSnapshot), 78 }, 79 } 80 err := catacomb.Invoke(catacomb.Plan{ 81 Site: &w.catacomb, 82 Work: func() error { 83 return w.loop(config.UnitTag) 84 }, 85 }) 86 if err != nil { 87 return nil, errors.Trace(err) 88 } 89 return w, nil 90 } 91 92 // Kill is part of the worker.Worker interface. 93 func (w *RemoteStateWatcher) Kill() { 94 w.catacomb.Kill(nil) 95 } 96 97 // Wait is part of the worker.Worker interface. 98 func (w *RemoteStateWatcher) Wait() error { 99 return w.catacomb.Wait() 100 } 101 102 func (w *RemoteStateWatcher) RemoteStateChanged() <-chan struct{} { 103 return w.out 104 } 105 106 func (w *RemoteStateWatcher) Snapshot() Snapshot { 107 w.mu.Lock() 108 defer w.mu.Unlock() 109 snapshot := w.current 110 snapshot.Relations = make(map[int]RelationSnapshot) 111 for id, relationSnapshot := range w.current.Relations { 112 relationSnapshotCopy := RelationSnapshot{ 113 Life: relationSnapshot.Life, 114 Members: make(map[string]int64), 115 } 116 for name, version := range relationSnapshot.Members { 117 relationSnapshotCopy.Members[name] = version 118 } 119 snapshot.Relations[id] = relationSnapshotCopy 120 } 121 snapshot.Storage = make(map[names.StorageTag]StorageSnapshot) 122 for tag, storageSnapshot := range w.current.Storage { 123 snapshot.Storage[tag] = storageSnapshot 124 } 125 snapshot.Actions = make([]string, len(w.current.Actions)) 126 copy(snapshot.Actions, w.current.Actions) 127 snapshot.Commands = make([]string, len(w.current.Commands)) 128 copy(snapshot.Commands, w.current.Commands) 129 return snapshot 130 } 131 132 func (w *RemoteStateWatcher) ClearResolvedMode() { 133 w.mu.Lock() 134 w.current.ResolvedMode = params.ResolvedNone 135 w.mu.Unlock() 136 } 137 138 func (w *RemoteStateWatcher) CommandCompleted(completed string) { 139 w.mu.Lock() 140 defer w.mu.Unlock() 141 for i, id := range w.current.Commands { 142 if id != completed { 143 continue 144 } 145 w.current.Commands = append( 146 w.current.Commands[:i], 147 w.current.Commands[i+1:]..., 148 ) 149 break 150 } 151 } 152 153 func (w *RemoteStateWatcher) setUp(unitTag names.UnitTag) (err error) { 154 // TODO(dfc) named return value is a time bomb 155 // TODO(axw) move this logic. 156 defer func() { 157 cause := errors.Cause(err) 158 if params.IsCodeNotFoundOrCodeUnauthorized(cause) { 159 err = worker.ErrTerminateAgent 160 } 161 }() 162 if w.unit, err = w.st.Unit(unitTag); err != nil { 163 return errors.Trace(err) 164 } 165 w.service, err = w.unit.Application() 166 if err != nil { 167 return errors.Trace(err) 168 } 169 return nil 170 } 171 172 func (w *RemoteStateWatcher) loop(unitTag names.UnitTag) (err error) { 173 if err := w.setUp(unitTag); err != nil { 174 return errors.Trace(err) 175 } 176 177 var requiredEvents int 178 179 var seenUnitChange bool 180 unitw, err := w.unit.Watch() 181 if err != nil { 182 return errors.Trace(err) 183 } 184 if err := w.catacomb.Add(unitw); err != nil { 185 return errors.Trace(err) 186 } 187 requiredEvents++ 188 189 var seenServiceChange bool 190 servicew, err := w.service.Watch() 191 if err != nil { 192 return errors.Trace(err) 193 } 194 if err := w.catacomb.Add(servicew); err != nil { 195 return errors.Trace(err) 196 } 197 requiredEvents++ 198 199 var seenConfigChange bool 200 configw, err := w.unit.WatchConfigSettings() 201 if err != nil { 202 return errors.Trace(err) 203 } 204 if err := w.catacomb.Add(configw); err != nil { 205 return errors.Trace(err) 206 } 207 requiredEvents++ 208 209 var seenRelationsChange bool 210 relationsw, err := w.service.WatchRelations() 211 if err != nil { 212 return errors.Trace(err) 213 } 214 if err := w.catacomb.Add(relationsw); err != nil { 215 return errors.Trace(err) 216 } 217 requiredEvents++ 218 219 var seenAddressesChange bool 220 addressesw, err := w.unit.WatchAddresses() 221 if err != nil { 222 return errors.Trace(err) 223 } 224 if err := w.catacomb.Add(addressesw); err != nil { 225 return errors.Trace(err) 226 } 227 requiredEvents++ 228 229 var seenStorageChange bool 230 storagew, err := w.unit.WatchStorage() 231 if err != nil { 232 return errors.Trace(err) 233 } 234 if err := w.catacomb.Add(storagew); err != nil { 235 return errors.Trace(err) 236 } 237 requiredEvents++ 238 239 var seenLeaderSettingsChange bool 240 leaderSettingsw, err := w.service.WatchLeadershipSettings() 241 if err != nil { 242 return errors.Trace(err) 243 } 244 if err := w.catacomb.Add(leaderSettingsw); err != nil { 245 return errors.Trace(err) 246 } 247 requiredEvents++ 248 249 var seenActionsChange bool 250 actionsw, err := w.unit.WatchActionNotifications() 251 if err != nil { 252 return errors.Trace(err) 253 } 254 if err := w.catacomb.Add(actionsw); err != nil { 255 return errors.Trace(err) 256 } 257 requiredEvents++ 258 259 var seenLeadershipChange bool 260 // There's no watcher for this per se; we wait on a channel 261 // returned by the leadership tracker. 262 requiredEvents++ 263 264 var eventsObserved int 265 observedEvent := func(flag *bool) { 266 if !*flag { 267 *flag = true 268 eventsObserved++ 269 } 270 } 271 272 // fire will, once the first event for each watcher has 273 // been observed, send a signal on the out channel. 274 fire := func() { 275 if eventsObserved != requiredEvents { 276 return 277 } 278 select { 279 case w.out <- struct{}{}: 280 default: 281 } 282 } 283 284 // Check the initial leadership status, and then we can flip-flop 285 // waiting on leader or minion to trigger the changed event. 286 var waitLeader, waitMinion <-chan struct{} 287 claimLeader := w.leadershipTracker.ClaimLeader() 288 select { 289 case <-w.catacomb.Dying(): 290 return w.catacomb.ErrDying() 291 case <-claimLeader.Ready(): 292 isLeader := claimLeader.Wait() 293 w.leadershipChanged(isLeader) 294 if isLeader { 295 waitMinion = w.leadershipTracker.WaitMinion().Ready() 296 } else { 297 waitLeader = w.leadershipTracker.WaitLeader().Ready() 298 } 299 observedEvent(&seenLeadershipChange) 300 } 301 302 for { 303 select { 304 case <-w.catacomb.Dying(): 305 return w.catacomb.ErrDying() 306 307 case _, ok := <-unitw.Changes(): 308 logger.Debugf("got unit change") 309 if !ok { 310 return errors.New("unit watcher closed") 311 } 312 if err := w.unitChanged(); err != nil { 313 return errors.Trace(err) 314 } 315 observedEvent(&seenUnitChange) 316 317 case _, ok := <-servicew.Changes(): 318 logger.Debugf("got service change") 319 if !ok { 320 return errors.New("service watcher closed") 321 } 322 if err := w.serviceChanged(); err != nil { 323 return errors.Trace(err) 324 } 325 observedEvent(&seenServiceChange) 326 327 case _, ok := <-configw.Changes(): 328 logger.Debugf("got config change: ok=%t", ok) 329 if !ok { 330 return errors.New("config watcher closed") 331 } 332 if err := w.configChanged(); err != nil { 333 return errors.Trace(err) 334 } 335 observedEvent(&seenConfigChange) 336 337 case _, ok := <-addressesw.Changes(): 338 logger.Debugf("got address change: ok=%t", ok) 339 if !ok { 340 return errors.New("addresses watcher closed") 341 } 342 if err := w.addressesChanged(); err != nil { 343 return errors.Trace(err) 344 } 345 observedEvent(&seenAddressesChange) 346 347 case _, ok := <-leaderSettingsw.Changes(): 348 logger.Debugf("got leader settings change: ok=%t", ok) 349 if !ok { 350 return errors.New("leader settings watcher closed") 351 } 352 if err := w.leaderSettingsChanged(); err != nil { 353 return errors.Trace(err) 354 } 355 observedEvent(&seenLeaderSettingsChange) 356 357 case actions, ok := <-actionsw.Changes(): 358 logger.Debugf("got action change: %v ok=%t", actions, ok) 359 if !ok { 360 return errors.New("actions watcher closed") 361 } 362 if err := w.actionsChanged(actions); err != nil { 363 return errors.Trace(err) 364 } 365 observedEvent(&seenActionsChange) 366 367 case keys, ok := <-relationsw.Changes(): 368 logger.Debugf("got relations change: ok=%t", ok) 369 if !ok { 370 return errors.New("relations watcher closed") 371 } 372 if err := w.relationsChanged(keys); err != nil { 373 return errors.Trace(err) 374 } 375 observedEvent(&seenRelationsChange) 376 377 case keys, ok := <-storagew.Changes(): 378 logger.Debugf("got storage change: %v ok=%t", keys, ok) 379 if !ok { 380 return errors.New("storage watcher closed") 381 } 382 if err := w.storageChanged(keys); err != nil { 383 return errors.Trace(err) 384 } 385 observedEvent(&seenStorageChange) 386 387 case <-waitMinion: 388 logger.Debugf("got leadership change: minion") 389 if err := w.leadershipChanged(false); err != nil { 390 return errors.Trace(err) 391 } 392 waitMinion = nil 393 waitLeader = w.leadershipTracker.WaitLeader().Ready() 394 395 case <-waitLeader: 396 logger.Debugf("got leadership change: leader") 397 if err := w.leadershipChanged(true); err != nil { 398 return errors.Trace(err) 399 } 400 waitLeader = nil 401 waitMinion = w.leadershipTracker.WaitMinion().Ready() 402 403 case change := <-w.storageAttachmentChanges: 404 logger.Debugf("storage attachment change %v", change) 405 if err := w.storageAttachmentChanged(change); err != nil { 406 return errors.Trace(err) 407 } 408 409 case change := <-w.relationUnitsChanges: 410 logger.Debugf("got a relation units change: %v", change) 411 if err := w.relationUnitsChanged(change); err != nil { 412 return errors.Trace(err) 413 } 414 415 case <-w.updateStatusChannel(): 416 logger.Debugf("update status timer triggered") 417 if err := w.updateStatusChanged(); err != nil { 418 return errors.Trace(err) 419 } 420 421 case id, ok := <-w.commandChannel: 422 if !ok { 423 return errors.New("commandChannel closed") 424 } 425 logger.Debugf("command enqueued: %v", id) 426 if err := w.commandsChanged(id); err != nil { 427 return err 428 } 429 430 case _, ok := <-w.retryHookChannel: 431 if !ok { 432 return errors.New("retryHookChannel closed") 433 } 434 logger.Debugf("retry hook timer triggered") 435 if err := w.retryHookTimerTriggered(); err != nil { 436 return err 437 } 438 } 439 440 // Something changed. 441 fire() 442 } 443 } 444 445 // updateStatusChanged is called when the update status timer expires. 446 func (w *RemoteStateWatcher) updateStatusChanged() error { 447 w.mu.Lock() 448 w.current.UpdateStatusVersion++ 449 w.mu.Unlock() 450 return nil 451 } 452 453 // commandsChanged is called when a command is enqueued. 454 func (w *RemoteStateWatcher) commandsChanged(id string) error { 455 w.mu.Lock() 456 w.current.Commands = append(w.current.Commands, id) 457 w.mu.Unlock() 458 return nil 459 } 460 461 // retryHookTimerTriggered is called when the retry hook timer expires. 462 func (w *RemoteStateWatcher) retryHookTimerTriggered() error { 463 w.mu.Lock() 464 w.current.RetryHookVersion++ 465 w.mu.Unlock() 466 return nil 467 } 468 469 // unitChanged responds to changes in the unit. 470 func (w *RemoteStateWatcher) unitChanged() error { 471 if err := w.unit.Refresh(); err != nil { 472 return errors.Trace(err) 473 } 474 resolved, err := w.unit.Resolved() 475 if err != nil { 476 return errors.Trace(err) 477 } 478 w.mu.Lock() 479 defer w.mu.Unlock() 480 w.current.Life = w.unit.Life() 481 w.current.ResolvedMode = resolved 482 return nil 483 } 484 485 // serviceChanged responds to changes in the service. 486 func (w *RemoteStateWatcher) serviceChanged() error { 487 if err := w.service.Refresh(); err != nil { 488 return errors.Trace(err) 489 } 490 url, force, err := w.service.CharmURL() 491 if err != nil { 492 return errors.Trace(err) 493 } 494 ver, err := w.service.CharmModifiedVersion() 495 if err != nil { 496 return errors.Trace(err) 497 } 498 w.mu.Lock() 499 w.current.CharmURL = url 500 w.current.ForceCharmUpgrade = force 501 w.current.CharmModifiedVersion = ver 502 w.mu.Unlock() 503 return nil 504 } 505 506 func (w *RemoteStateWatcher) configChanged() error { 507 w.mu.Lock() 508 w.current.ConfigVersion++ 509 w.mu.Unlock() 510 return nil 511 } 512 513 func (w *RemoteStateWatcher) addressesChanged() error { 514 w.mu.Lock() 515 w.current.ConfigVersion++ 516 w.mu.Unlock() 517 return nil 518 } 519 520 func (w *RemoteStateWatcher) leaderSettingsChanged() error { 521 w.mu.Lock() 522 w.current.LeaderSettingsVersion++ 523 w.mu.Unlock() 524 return nil 525 } 526 527 func (w *RemoteStateWatcher) leadershipChanged(isLeader bool) error { 528 w.mu.Lock() 529 w.current.Leader = isLeader 530 w.mu.Unlock() 531 return nil 532 } 533 534 // relationsChanged responds to service relation changes. 535 func (w *RemoteStateWatcher) relationsChanged(keys []string) error { 536 w.mu.Lock() 537 defer w.mu.Unlock() 538 for _, key := range keys { 539 relationTag := names.NewRelationTag(key) 540 rel, err := w.st.Relation(relationTag) 541 if params.IsCodeNotFoundOrCodeUnauthorized(err) { 542 // If it's actually gone, this unit cannot have entered 543 // scope, and therefore never needs to know about it. 544 if ruw, ok := w.relations[relationTag]; ok { 545 worker.Stop(ruw) 546 delete(w.relations, relationTag) 547 delete(w.current.Relations, ruw.relationId) 548 } 549 } else if err != nil { 550 return errors.Trace(err) 551 } else { 552 if _, ok := w.relations[relationTag]; ok { 553 relationSnapshot := w.current.Relations[rel.Id()] 554 relationSnapshot.Life = rel.Life() 555 w.current.Relations[rel.Id()] = relationSnapshot 556 continue 557 } 558 ruw, err := w.st.WatchRelationUnits(relationTag, w.unit.Tag()) 559 if err != nil { 560 return errors.Trace(err) 561 } 562 // Because of the delay before handing off responsibility to 563 // newRelationUnitsWatcher below, add to our own catacomb to 564 // ensure errors get picked up if they happen. 565 if err := w.catacomb.Add(ruw); err != nil { 566 return errors.Trace(err) 567 } 568 if err := w.watchRelationUnits(rel, relationTag, ruw); err != nil { 569 return errors.Trace(err) 570 } 571 } 572 } 573 return nil 574 } 575 576 // watchRelationUnits starts watching the relation units for the given 577 // relation, waits for its first event, and records the information in 578 // the current snapshot. 579 func (w *RemoteStateWatcher) watchRelationUnits( 580 rel Relation, relationTag names.RelationTag, ruw watcher.RelationUnitsWatcher, 581 ) error { 582 relationSnapshot := RelationSnapshot{ 583 Life: rel.Life(), 584 Members: make(map[string]int64), 585 } 586 select { 587 case <-w.catacomb.Dying(): 588 return w.catacomb.ErrDying() 589 case change, ok := <-ruw.Changes(): 590 if !ok { 591 return errors.New("relation units watcher closed") 592 } 593 for unit, settings := range change.Changed { 594 relationSnapshot.Members[unit] = settings.Version 595 } 596 } 597 innerRUW, err := newRelationUnitsWatcher(rel.Id(), ruw, w.relationUnitsChanges) 598 if err != nil { 599 return errors.Trace(err) 600 } 601 if err := w.catacomb.Add(innerRUW); err != nil { 602 return errors.Trace(err) 603 } 604 w.current.Relations[rel.Id()] = relationSnapshot 605 w.relations[relationTag] = innerRUW 606 return nil 607 } 608 609 // relationUnitsChanged responds to relation units changes. 610 func (w *RemoteStateWatcher) relationUnitsChanged(change relationUnitsChange) error { 611 w.mu.Lock() 612 defer w.mu.Unlock() 613 snapshot, ok := w.current.Relations[change.relationId] 614 if !ok { 615 return nil 616 } 617 for unit, settings := range change.Changed { 618 snapshot.Members[unit] = settings.Version 619 } 620 for _, unit := range change.Departed { 621 delete(snapshot.Members, unit) 622 } 623 return nil 624 } 625 626 // storageAttachmentChanged responds to storage attachment changes. 627 func (w *RemoteStateWatcher) storageAttachmentChanged(change storageAttachmentChange) error { 628 w.mu.Lock() 629 w.current.Storage[change.Tag] = change.Snapshot 630 w.mu.Unlock() 631 return nil 632 } 633 634 func (w *RemoteStateWatcher) actionsChanged(actions []string) error { 635 w.mu.Lock() 636 defer w.mu.Unlock() 637 w.current.Actions = append(w.current.Actions, actions...) 638 return nil 639 } 640 641 // storageChanged responds to unit storage changes. 642 func (w *RemoteStateWatcher) storageChanged(keys []string) error { 643 tags := make([]names.StorageTag, len(keys)) 644 for i, key := range keys { 645 tags[i] = names.NewStorageTag(key) 646 } 647 ids := make([]params.StorageAttachmentId, len(keys)) 648 for i, tag := range tags { 649 ids[i] = params.StorageAttachmentId{ 650 StorageTag: tag.String(), 651 UnitTag: w.unit.Tag().String(), 652 } 653 } 654 results, err := w.st.StorageAttachmentLife(ids) 655 if err != nil { 656 return errors.Trace(err) 657 } 658 659 w.mu.Lock() 660 defer w.mu.Unlock() 661 662 for i, result := range results { 663 tag := tags[i] 664 if result.Error == nil { 665 if storageSnapshot, ok := w.current.Storage[tag]; ok { 666 // We've previously started a watcher for this storage 667 // attachment, so all we needed to do was update the 668 // lifecycle state. 669 storageSnapshot.Life = result.Life 670 w.current.Storage[tag] = storageSnapshot 671 continue 672 } 673 // We haven't seen this storage attachment before, so start 674 // a watcher now; add it to our catacomb in case of mishap; 675 // and wait for the initial event. 676 saw, err := w.st.WatchStorageAttachment(tag, w.unit.Tag()) 677 if err != nil { 678 return errors.Annotate(err, "watching storage attachment") 679 } 680 if err := w.catacomb.Add(saw); err != nil { 681 return errors.Trace(err) 682 } 683 if err := w.watchStorageAttachment(tag, result.Life, saw); err != nil { 684 return errors.Trace(err) 685 } 686 } else if params.IsCodeNotFound(result.Error) { 687 if watcher, ok := w.storageAttachmentWatchers[tag]; ok { 688 // already under catacomb management, any error tracked already 689 worker.Stop(watcher) 690 delete(w.storageAttachmentWatchers, tag) 691 } 692 delete(w.current.Storage, tag) 693 } else { 694 return errors.Annotatef( 695 result.Error, "getting life of %s attachment", 696 names.ReadableString(tag), 697 ) 698 } 699 } 700 return nil 701 } 702 703 // watchStorageAttachment starts watching the storage attachment with 704 // the specified storage tag, waits for its first event, and records 705 // the information in the current snapshot. 706 func (w *RemoteStateWatcher) watchStorageAttachment( 707 tag names.StorageTag, 708 life params.Life, 709 saw watcher.NotifyWatcher, 710 ) error { 711 var storageSnapshot StorageSnapshot 712 select { 713 case <-w.catacomb.Dying(): 714 return w.catacomb.ErrDying() 715 case _, ok := <-saw.Changes(): 716 if !ok { 717 return errors.New("storage attachment watcher closed") 718 } 719 var err error 720 storageSnapshot, err = getStorageSnapshot(w.st, tag, w.unit.Tag()) 721 if params.IsCodeNotProvisioned(err) { 722 // If the storage is unprovisioned, we still want to 723 // record the attachment, but we'll mark it as 724 // unattached. This allows the uniter to wait for 725 // pending storage attachments to be provisioned. 726 storageSnapshot = StorageSnapshot{Life: life} 727 } else if err != nil { 728 return errors.Annotatef(err, "processing initial storage attachment change") 729 } 730 } 731 innerSAW, err := newStorageAttachmentWatcher( 732 w.st, saw, w.unit.Tag(), tag, w.storageAttachmentChanges, 733 ) 734 if err != nil { 735 return errors.Trace(err) 736 } 737 w.current.Storage[tag] = storageSnapshot 738 w.storageAttachmentWatchers[tag] = innerSAW 739 return nil 740 }