github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/worker/uniter/remotestate/watcher.go (about) 1 // Copyright 2012-2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package remotestate 5 6 import ( 7 "sync" 8 "time" 9 10 "github.com/juju/errors" 11 "github.com/juju/loggo" 12 "github.com/juju/names" 13 14 "github.com/juju/juju/apiserver/params" 15 "github.com/juju/juju/core/leadership" 16 "github.com/juju/juju/watcher" 17 "github.com/juju/juju/worker" 18 "github.com/juju/juju/worker/catacomb" 19 ) 20 21 var logger = loggo.GetLogger("juju.worker.uniter.remotestate") 22 23 // RemoteStateWatcher collects unit, service, and service config information 24 // from separate state watchers, and updates a Snapshot which is sent on a 25 // channel upon change. 26 type RemoteStateWatcher struct { 27 st State 28 unit Unit 29 service Service 30 relations map[names.RelationTag]*relationUnitsWatcher 31 relationUnitsChanges chan relationUnitsChange 32 storageAttachmentWatchers map[names.StorageTag]*storageAttachmentWatcher 33 storageAttachmentChanges chan storageAttachmentChange 34 leadershipTracker leadership.Tracker 35 updateStatusChannel func() <-chan time.Time 36 commandChannel <-chan string 37 retryHookChannel <-chan struct{} 38 39 catacomb catacomb.Catacomb 40 41 out chan struct{} 42 mu sync.Mutex 43 current Snapshot 44 } 45 46 // WatcherConfig holds configuration parameters for the 47 // remote state watcher. 48 type WatcherConfig struct { 49 State State 50 LeadershipTracker leadership.Tracker 51 UpdateStatusChannel func() <-chan time.Time 52 CommandChannel <-chan string 53 RetryHookChannel <-chan struct{} 54 UnitTag names.UnitTag 55 } 56 57 // NewWatcher returns a RemoteStateWatcher that handles state changes pertaining to the 58 // supplied unit. 59 func NewWatcher(config WatcherConfig) (*RemoteStateWatcher, error) { 60 w := &RemoteStateWatcher{ 61 st: config.State, 62 relations: make(map[names.RelationTag]*relationUnitsWatcher), 63 relationUnitsChanges: make(chan relationUnitsChange), 64 storageAttachmentWatchers: make(map[names.StorageTag]*storageAttachmentWatcher), 65 storageAttachmentChanges: make(chan storageAttachmentChange), 66 leadershipTracker: config.LeadershipTracker, 67 updateStatusChannel: config.UpdateStatusChannel, 68 commandChannel: config.CommandChannel, 69 retryHookChannel: config.RetryHookChannel, 70 // Note: it is important that the out channel be buffered! 71 // The remote state watcher will perform a non-blocking send 72 // on the channel to wake up the observer. It is non-blocking 73 // so that we coalesce events while the observer is busy. 74 out: make(chan struct{}, 1), 75 current: Snapshot{ 76 Relations: make(map[int]RelationSnapshot), 77 Storage: make(map[names.StorageTag]StorageSnapshot), 78 }, 79 } 80 err := catacomb.Invoke(catacomb.Plan{ 81 Site: &w.catacomb, 82 Work: func() error { 83 return w.loop(config.UnitTag) 84 }, 85 }) 86 if err != nil { 87 return nil, errors.Trace(err) 88 } 89 return w, nil 90 } 91 92 // Kill is part of the worker.Worker interface. 93 func (w *RemoteStateWatcher) Kill() { 94 w.catacomb.Kill(nil) 95 } 96 97 // Wait is part of the worker.Worker interface. 98 func (w *RemoteStateWatcher) Wait() error { 99 return w.catacomb.Wait() 100 } 101 102 func (w *RemoteStateWatcher) RemoteStateChanged() <-chan struct{} { 103 return w.out 104 } 105 106 func (w *RemoteStateWatcher) Snapshot() Snapshot { 107 w.mu.Lock() 108 defer w.mu.Unlock() 109 snapshot := w.current 110 snapshot.Relations = make(map[int]RelationSnapshot) 111 for id, relationSnapshot := range w.current.Relations { 112 snapshot.Relations[id] = relationSnapshot 113 } 114 snapshot.Storage = make(map[names.StorageTag]StorageSnapshot) 115 for tag, storageSnapshot := range w.current.Storage { 116 snapshot.Storage[tag] = storageSnapshot 117 } 118 snapshot.Actions = make([]string, len(w.current.Actions)) 119 copy(snapshot.Actions, w.current.Actions) 120 snapshot.Commands = make([]string, len(w.current.Commands)) 121 copy(snapshot.Commands, w.current.Commands) 122 return snapshot 123 } 124 125 func (w *RemoteStateWatcher) ClearResolvedMode() { 126 w.mu.Lock() 127 w.current.ResolvedMode = params.ResolvedNone 128 w.mu.Unlock() 129 } 130 131 func (w *RemoteStateWatcher) CommandCompleted(completed string) { 132 w.mu.Lock() 133 defer w.mu.Unlock() 134 for i, id := range w.current.Commands { 135 if id != completed { 136 continue 137 } 138 w.current.Commands = append( 139 w.current.Commands[:i], 140 w.current.Commands[i+1:]..., 141 ) 142 break 143 } 144 } 145 146 func (w *RemoteStateWatcher) setUp(unitTag names.UnitTag) (err error) { 147 // TODO(dfc) named return value is a time bomb 148 // TODO(axw) move this logic. 149 defer func() { 150 cause := errors.Cause(err) 151 if params.IsCodeNotFoundOrCodeUnauthorized(cause) { 152 err = worker.ErrTerminateAgent 153 } 154 }() 155 if w.unit, err = w.st.Unit(unitTag); err != nil { 156 return errors.Trace(err) 157 } 158 w.service, err = w.unit.Service() 159 if err != nil { 160 return errors.Trace(err) 161 } 162 return nil 163 } 164 165 func (w *RemoteStateWatcher) loop(unitTag names.UnitTag) (err error) { 166 if err := w.setUp(unitTag); err != nil { 167 return errors.Trace(err) 168 } 169 170 var requiredEvents int 171 172 var seenUnitChange bool 173 unitw, err := w.unit.Watch() 174 if err != nil { 175 return errors.Trace(err) 176 } 177 if err := w.catacomb.Add(unitw); err != nil { 178 return errors.Trace(err) 179 } 180 requiredEvents++ 181 182 var seenServiceChange bool 183 servicew, err := w.service.Watch() 184 if err != nil { 185 return errors.Trace(err) 186 } 187 if err := w.catacomb.Add(servicew); err != nil { 188 return errors.Trace(err) 189 } 190 requiredEvents++ 191 192 var seenConfigChange bool 193 configw, err := w.unit.WatchConfigSettings() 194 if err != nil { 195 return errors.Trace(err) 196 } 197 if err := w.catacomb.Add(configw); err != nil { 198 return errors.Trace(err) 199 } 200 requiredEvents++ 201 202 var seenRelationsChange bool 203 relationsw, err := w.service.WatchRelations() 204 if err != nil { 205 return errors.Trace(err) 206 } 207 if err := w.catacomb.Add(relationsw); err != nil { 208 return errors.Trace(err) 209 } 210 requiredEvents++ 211 212 var seenAddressesChange bool 213 addressesw, err := w.unit.WatchAddresses() 214 if err != nil { 215 return errors.Trace(err) 216 } 217 if err := w.catacomb.Add(addressesw); err != nil { 218 return errors.Trace(err) 219 } 220 requiredEvents++ 221 222 var seenStorageChange bool 223 storagew, err := w.unit.WatchStorage() 224 if err != nil { 225 return errors.Trace(err) 226 } 227 if err := w.catacomb.Add(storagew); err != nil { 228 return errors.Trace(err) 229 } 230 requiredEvents++ 231 232 var seenLeaderSettingsChange bool 233 leaderSettingsw, err := w.service.WatchLeadershipSettings() 234 if err != nil { 235 return errors.Trace(err) 236 } 237 if err := w.catacomb.Add(leaderSettingsw); err != nil { 238 return errors.Trace(err) 239 } 240 requiredEvents++ 241 242 var seenActionsChange bool 243 actionsw, err := w.unit.WatchActionNotifications() 244 if err != nil { 245 return errors.Trace(err) 246 } 247 if err := w.catacomb.Add(actionsw); err != nil { 248 return errors.Trace(err) 249 } 250 requiredEvents++ 251 252 var seenLeadershipChange bool 253 // There's no watcher for this per se; we wait on a channel 254 // returned by the leadership tracker. 255 requiredEvents++ 256 257 var eventsObserved int 258 observedEvent := func(flag *bool) { 259 if !*flag { 260 *flag = true 261 eventsObserved++ 262 } 263 } 264 265 // fire will, once the first event for each watcher has 266 // been observed, send a signal on the out channel. 267 fire := func() { 268 if eventsObserved != requiredEvents { 269 return 270 } 271 select { 272 case w.out <- struct{}{}: 273 default: 274 } 275 } 276 277 // Check the initial leadership status, and then we can flip-flop 278 // waiting on leader or minion to trigger the changed event. 279 var waitLeader, waitMinion <-chan struct{} 280 claimLeader := w.leadershipTracker.ClaimLeader() 281 select { 282 case <-w.catacomb.Dying(): 283 return w.catacomb.ErrDying() 284 case <-claimLeader.Ready(): 285 isLeader := claimLeader.Wait() 286 w.leadershipChanged(isLeader) 287 if isLeader { 288 waitMinion = w.leadershipTracker.WaitMinion().Ready() 289 } else { 290 waitLeader = w.leadershipTracker.WaitLeader().Ready() 291 } 292 observedEvent(&seenLeadershipChange) 293 } 294 295 for { 296 select { 297 case <-w.catacomb.Dying(): 298 return w.catacomb.ErrDying() 299 300 case _, ok := <-unitw.Changes(): 301 logger.Debugf("got unit change") 302 if !ok { 303 return errors.New("unit watcher closed") 304 } 305 if err := w.unitChanged(); err != nil { 306 return errors.Trace(err) 307 } 308 observedEvent(&seenUnitChange) 309 310 case _, ok := <-servicew.Changes(): 311 logger.Debugf("got service change") 312 if !ok { 313 return errors.New("service watcher closed") 314 } 315 if err := w.serviceChanged(); err != nil { 316 return errors.Trace(err) 317 } 318 observedEvent(&seenServiceChange) 319 320 case _, ok := <-configw.Changes(): 321 logger.Debugf("got config change: ok=%t", ok) 322 if !ok { 323 return errors.New("config watcher closed") 324 } 325 if err := w.configChanged(); err != nil { 326 return errors.Trace(err) 327 } 328 observedEvent(&seenConfigChange) 329 330 case _, ok := <-addressesw.Changes(): 331 logger.Debugf("got address change: ok=%t", ok) 332 if !ok { 333 return errors.New("addresses watcher closed") 334 } 335 if err := w.addressesChanged(); err != nil { 336 return errors.Trace(err) 337 } 338 observedEvent(&seenAddressesChange) 339 340 case _, ok := <-leaderSettingsw.Changes(): 341 logger.Debugf("got leader settings change: ok=%t", ok) 342 if !ok { 343 return errors.New("leader settings watcher closed") 344 } 345 if err := w.leaderSettingsChanged(); err != nil { 346 return errors.Trace(err) 347 } 348 observedEvent(&seenLeaderSettingsChange) 349 350 case actions, ok := <-actionsw.Changes(): 351 logger.Debugf("got action change: %v ok=%t", actions, ok) 352 if !ok { 353 return errors.New("actions watcher closed") 354 } 355 if err := w.actionsChanged(actions); err != nil { 356 return errors.Trace(err) 357 } 358 observedEvent(&seenActionsChange) 359 360 case keys, ok := <-relationsw.Changes(): 361 logger.Debugf("got relations change: ok=%t", ok) 362 if !ok { 363 return errors.New("relations watcher closed") 364 } 365 if err := w.relationsChanged(keys); err != nil { 366 return errors.Trace(err) 367 } 368 observedEvent(&seenRelationsChange) 369 370 case keys, ok := <-storagew.Changes(): 371 logger.Debugf("got storage change: %v ok=%t", keys, ok) 372 if !ok { 373 return errors.New("storage watcher closed") 374 } 375 if err := w.storageChanged(keys); err != nil { 376 return errors.Trace(err) 377 } 378 observedEvent(&seenStorageChange) 379 380 case <-waitMinion: 381 logger.Debugf("got leadership change: minion") 382 if err := w.leadershipChanged(false); err != nil { 383 return errors.Trace(err) 384 } 385 waitMinion = nil 386 waitLeader = w.leadershipTracker.WaitLeader().Ready() 387 388 case <-waitLeader: 389 logger.Debugf("got leadership change: leader") 390 if err := w.leadershipChanged(true); err != nil { 391 return errors.Trace(err) 392 } 393 waitLeader = nil 394 waitMinion = w.leadershipTracker.WaitMinion().Ready() 395 396 case change := <-w.storageAttachmentChanges: 397 logger.Debugf("storage attachment change %v", change) 398 if err := w.storageAttachmentChanged(change); err != nil { 399 return errors.Trace(err) 400 } 401 402 case change := <-w.relationUnitsChanges: 403 logger.Debugf("got a relation units change: %v", change) 404 if err := w.relationUnitsChanged(change); err != nil { 405 return errors.Trace(err) 406 } 407 408 case <-w.updateStatusChannel(): 409 logger.Debugf("update status timer triggered") 410 if err := w.updateStatusChanged(); err != nil { 411 return errors.Trace(err) 412 } 413 414 case id := <-w.commandChannel: 415 logger.Debugf("command enqueued: %v", id) 416 if err := w.commandsChanged(id); err != nil { 417 return err 418 } 419 420 case <-w.retryHookChannel: 421 logger.Debugf("retry hook timer triggered") 422 if err := w.retryHookTimerTriggered(); err != nil { 423 return err 424 } 425 } 426 427 // Something changed. 428 fire() 429 } 430 } 431 432 // updateStatusChanged is called when the update status timer expires. 433 func (w *RemoteStateWatcher) updateStatusChanged() error { 434 w.mu.Lock() 435 w.current.UpdateStatusVersion++ 436 w.mu.Unlock() 437 return nil 438 } 439 440 // commandsChanged is called when a command is enqueued. 441 func (w *RemoteStateWatcher) commandsChanged(id string) error { 442 w.mu.Lock() 443 w.current.Commands = append(w.current.Commands, id) 444 w.mu.Unlock() 445 return nil 446 } 447 448 // retryHookTimerTriggered is called when the retry hook timer expires. 449 func (w *RemoteStateWatcher) retryHookTimerTriggered() error { 450 w.mu.Lock() 451 w.current.RetryHookVersion++ 452 w.mu.Unlock() 453 return nil 454 } 455 456 // unitChanged responds to changes in the unit. 457 func (w *RemoteStateWatcher) unitChanged() error { 458 if err := w.unit.Refresh(); err != nil { 459 return errors.Trace(err) 460 } 461 resolved, err := w.unit.Resolved() 462 if err != nil { 463 return errors.Trace(err) 464 } 465 w.mu.Lock() 466 defer w.mu.Unlock() 467 w.current.Life = w.unit.Life() 468 w.current.ResolvedMode = resolved 469 return nil 470 } 471 472 // serviceChanged responds to changes in the service. 473 func (w *RemoteStateWatcher) serviceChanged() error { 474 if err := w.service.Refresh(); err != nil { 475 return errors.Trace(err) 476 } 477 url, force, err := w.service.CharmURL() 478 if err != nil { 479 return errors.Trace(err) 480 } 481 ver, err := w.service.CharmModifiedVersion() 482 if err != nil { 483 return errors.Trace(err) 484 } 485 w.mu.Lock() 486 w.current.CharmURL = url 487 w.current.ForceCharmUpgrade = force 488 w.current.CharmModifiedVersion = ver 489 w.mu.Unlock() 490 return nil 491 } 492 493 func (w *RemoteStateWatcher) configChanged() error { 494 w.mu.Lock() 495 w.current.ConfigVersion++ 496 w.mu.Unlock() 497 return nil 498 } 499 500 func (w *RemoteStateWatcher) addressesChanged() error { 501 w.mu.Lock() 502 w.current.ConfigVersion++ 503 w.mu.Unlock() 504 return nil 505 } 506 507 func (w *RemoteStateWatcher) leaderSettingsChanged() error { 508 w.mu.Lock() 509 w.current.LeaderSettingsVersion++ 510 w.mu.Unlock() 511 return nil 512 } 513 514 func (w *RemoteStateWatcher) leadershipChanged(isLeader bool) error { 515 w.mu.Lock() 516 w.current.Leader = isLeader 517 w.mu.Unlock() 518 return nil 519 } 520 521 // relationsChanged responds to service relation changes. 522 func (w *RemoteStateWatcher) relationsChanged(keys []string) error { 523 w.mu.Lock() 524 defer w.mu.Unlock() 525 for _, key := range keys { 526 relationTag := names.NewRelationTag(key) 527 rel, err := w.st.Relation(relationTag) 528 if params.IsCodeNotFoundOrCodeUnauthorized(err) { 529 // If it's actually gone, this unit cannot have entered 530 // scope, and therefore never needs to know about it. 531 if ruw, ok := w.relations[relationTag]; ok { 532 worker.Stop(ruw) 533 delete(w.relations, relationTag) 534 delete(w.current.Relations, ruw.relationId) 535 } 536 } else if err != nil { 537 return errors.Trace(err) 538 } else { 539 if _, ok := w.relations[relationTag]; ok { 540 relationSnapshot := w.current.Relations[rel.Id()] 541 relationSnapshot.Life = rel.Life() 542 w.current.Relations[rel.Id()] = relationSnapshot 543 continue 544 } 545 ruw, err := w.st.WatchRelationUnits(relationTag, w.unit.Tag()) 546 if err != nil { 547 return errors.Trace(err) 548 } 549 // Because of the delay before handing off responsibility to 550 // newRelationUnitsWatcher below, add to our own catacomb to 551 // ensure errors get picked up if they happen. 552 if err := w.catacomb.Add(ruw); err != nil { 553 return errors.Trace(err) 554 } 555 if err := w.watchRelationUnits(rel, relationTag, ruw); err != nil { 556 return errors.Trace(err) 557 } 558 } 559 } 560 return nil 561 } 562 563 // watchRelationUnits starts watching the relation units for the given 564 // relation, waits for its first event, and records the information in 565 // the current snapshot. 566 func (w *RemoteStateWatcher) watchRelationUnits( 567 rel Relation, relationTag names.RelationTag, ruw watcher.RelationUnitsWatcher, 568 ) error { 569 relationSnapshot := RelationSnapshot{ 570 Life: rel.Life(), 571 Members: make(map[string]int64), 572 } 573 select { 574 case <-w.catacomb.Dying(): 575 return w.catacomb.ErrDying() 576 case change, ok := <-ruw.Changes(): 577 if !ok { 578 return errors.New("relation units watcher closed") 579 } 580 for unit, settings := range change.Changed { 581 relationSnapshot.Members[unit] = settings.Version 582 } 583 } 584 innerRUW, err := newRelationUnitsWatcher(rel.Id(), ruw, w.relationUnitsChanges) 585 if err != nil { 586 return errors.Trace(err) 587 } 588 if err := w.catacomb.Add(innerRUW); err != nil { 589 return errors.Trace(err) 590 } 591 w.current.Relations[rel.Id()] = relationSnapshot 592 w.relations[relationTag] = innerRUW 593 return nil 594 } 595 596 // relationUnitsChanged responds to relation units changes. 597 func (w *RemoteStateWatcher) relationUnitsChanged(change relationUnitsChange) error { 598 w.mu.Lock() 599 defer w.mu.Unlock() 600 snapshot, ok := w.current.Relations[change.relationId] 601 if !ok { 602 return nil 603 } 604 for unit, settings := range change.Changed { 605 snapshot.Members[unit] = settings.Version 606 } 607 for _, unit := range change.Departed { 608 delete(snapshot.Members, unit) 609 } 610 return nil 611 } 612 613 // storageAttachmentChanged responds to storage attachment changes. 614 func (w *RemoteStateWatcher) storageAttachmentChanged(change storageAttachmentChange) error { 615 w.mu.Lock() 616 w.current.Storage[change.Tag] = change.Snapshot 617 w.mu.Unlock() 618 return nil 619 } 620 621 func (w *RemoteStateWatcher) actionsChanged(actions []string) error { 622 w.mu.Lock() 623 defer w.mu.Unlock() 624 w.current.Actions = append(w.current.Actions, actions...) 625 return nil 626 } 627 628 // storageChanged responds to unit storage changes. 629 func (w *RemoteStateWatcher) storageChanged(keys []string) error { 630 tags := make([]names.StorageTag, len(keys)) 631 for i, key := range keys { 632 tags[i] = names.NewStorageTag(key) 633 } 634 ids := make([]params.StorageAttachmentId, len(keys)) 635 for i, tag := range tags { 636 ids[i] = params.StorageAttachmentId{ 637 StorageTag: tag.String(), 638 UnitTag: w.unit.Tag().String(), 639 } 640 } 641 results, err := w.st.StorageAttachmentLife(ids) 642 if err != nil { 643 return errors.Trace(err) 644 } 645 646 w.mu.Lock() 647 defer w.mu.Unlock() 648 649 for i, result := range results { 650 tag := tags[i] 651 if result.Error == nil { 652 if storageSnapshot, ok := w.current.Storage[tag]; ok { 653 // We've previously started a watcher for this storage 654 // attachment, so all we needed to do was update the 655 // lifecycle state. 656 storageSnapshot.Life = result.Life 657 w.current.Storage[tag] = storageSnapshot 658 continue 659 } 660 // We haven't seen this storage attachment before, so start 661 // a watcher now; add it to our catacomb in case of mishap; 662 // and wait for the initial event. 663 saw, err := w.st.WatchStorageAttachment(tag, w.unit.Tag()) 664 if err != nil { 665 return errors.Annotate(err, "watching storage attachment") 666 } 667 if err := w.catacomb.Add(saw); err != nil { 668 return errors.Trace(err) 669 } 670 if err := w.watchStorageAttachment(tag, result.Life, saw); err != nil { 671 return errors.Trace(err) 672 } 673 } else if params.IsCodeNotFound(result.Error) { 674 if watcher, ok := w.storageAttachmentWatchers[tag]; ok { 675 // already under catacomb management, any error tracked already 676 worker.Stop(watcher) 677 delete(w.storageAttachmentWatchers, tag) 678 } 679 delete(w.current.Storage, tag) 680 } else { 681 return errors.Annotatef( 682 result.Error, "getting life of %s attachment", 683 names.ReadableString(tag), 684 ) 685 } 686 } 687 return nil 688 } 689 690 // watchStorageAttachment starts watching the storage attachment with 691 // the specified storage tag, waits for its first event, and records 692 // the information in the current snapshot. 693 func (w *RemoteStateWatcher) watchStorageAttachment( 694 tag names.StorageTag, 695 life params.Life, 696 saw watcher.NotifyWatcher, 697 ) error { 698 var storageSnapshot StorageSnapshot 699 select { 700 case <-w.catacomb.Dying(): 701 return w.catacomb.ErrDying() 702 case _, ok := <-saw.Changes(): 703 if !ok { 704 return errors.New("storage attachment watcher closed") 705 } 706 var err error 707 storageSnapshot, err = getStorageSnapshot(w.st, tag, w.unit.Tag()) 708 if params.IsCodeNotProvisioned(err) { 709 // If the storage is unprovisioned, we still want to 710 // record the attachment, but we'll mark it as 711 // unattached. This allows the uniter to wait for 712 // pending storage attachments to be provisioned. 713 storageSnapshot = StorageSnapshot{Life: life} 714 } else if err != nil { 715 return errors.Annotatef(err, "processing initial storage attachment change") 716 } 717 } 718 innerSAW, err := newStorageAttachmentWatcher( 719 w.st, saw, w.unit.Tag(), tag, w.storageAttachmentChanges, 720 ) 721 if err != nil { 722 return errors.Trace(err) 723 } 724 w.current.Storage[tag] = storageSnapshot 725 w.storageAttachmentWatchers[tag] = innerSAW 726 return nil 727 }