github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/state/unit.go (about) 1 // Copyright 2012-2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package state 5 6 import ( 7 "fmt" 8 "reflect" 9 "sort" 10 "time" 11 12 "github.com/juju/charm/v12" 13 "github.com/juju/collections/set" 14 "github.com/juju/errors" 15 "github.com/juju/loggo" 16 "github.com/juju/mgo/v3" 17 "github.com/juju/mgo/v3/bson" 18 "github.com/juju/mgo/v3/txn" 19 "github.com/juju/names/v5" 20 jujutxn "github.com/juju/txn/v3" 21 "github.com/juju/utils/v3" 22 "github.com/juju/version/v2" 23 24 "github.com/juju/juju/core/actions" 25 "github.com/juju/juju/core/constraints" 26 "github.com/juju/juju/core/instance" 27 "github.com/juju/juju/core/model" 28 "github.com/juju/juju/core/network" 29 "github.com/juju/juju/core/status" 30 mgoutils "github.com/juju/juju/mongo/utils" 31 stateerrors "github.com/juju/juju/state/errors" 32 "github.com/juju/juju/tools" 33 ) 34 35 var unitLogger = loggo.GetLogger("juju.state.unit") 36 37 // AssignmentPolicy controls what machine a unit will be assigned to. 38 type AssignmentPolicy string 39 40 const ( 41 // AssignLocal indicates that all application units should be assigned 42 // to machine 0. 43 AssignLocal AssignmentPolicy = "local" 44 45 // AssignClean indicates that every application unit should be assigned 46 // to a machine which never previously has hosted any units, and that 47 // new machines should be launched if required. 48 AssignClean AssignmentPolicy = "clean" 49 50 // AssignCleanEmpty indicates that every application unit should be assigned 51 // to a machine which never previously has hosted any units, and which is not 52 // currently hosting any containers, and that new machines should be launched if required. 53 AssignCleanEmpty AssignmentPolicy = "clean-empty" 54 55 // AssignNew indicates that every application unit should be assigned to a new 56 // dedicated machine. A new machine will be launched for each new unit. 57 AssignNew AssignmentPolicy = "new" 58 ) 59 60 // ResolvedMode describes the way state transition errors 61 // are resolved. 62 type ResolvedMode string 63 64 // These are available ResolvedMode values. 65 const ( 66 ResolvedNone ResolvedMode = "" 67 ResolvedRetryHooks ResolvedMode = "retry-hooks" 68 ResolvedNoHooks ResolvedMode = "no-hooks" 69 ) 70 71 // unitDoc represents the internal state of a unit in MongoDB. 72 // Note the correspondence with UnitInfo in core/multiwatcher. 73 type unitDoc struct { 74 DocID string `bson:"_id"` 75 Name string `bson:"name"` 76 ModelUUID string `bson:"model-uuid"` 77 Base Base `bson:"base"` 78 Application string 79 CharmURL *string 80 Principal string 81 Subordinates []string 82 StorageAttachmentCount int `bson:"storageattachmentcount"` 83 MachineId string 84 Resolved ResolvedMode 85 Tools *tools.Tools `bson:",omitempty"` 86 Life Life 87 PasswordHash string 88 } 89 90 // Unit represents the state of an application unit. 91 type Unit struct { 92 st *State 93 doc unitDoc 94 95 // Cache the model type as it is immutable as is referenced 96 // during the lifecycle of the unit. 97 modelType ModelType 98 } 99 100 func newUnit(st *State, modelType ModelType, udoc *unitDoc) *Unit { 101 unit := &Unit{ 102 st: st, 103 doc: *udoc, 104 modelType: modelType, 105 } 106 return unit 107 } 108 109 // ContainerInfo returns information about the containing hosting this unit. 110 // This is only used for CAAS models. 111 func (u *Unit) ContainerInfo() (CloudContainer, error) { 112 doc, err := u.cloudContainer() 113 if err != nil { 114 return nil, errors.Trace(err) 115 } 116 return &cloudContainer{doc: *doc, unitName: u.Name()}, nil 117 } 118 119 // ShouldBeAssigned returns whether the unit should be assigned to a machine. 120 // IAAS models require units to be assigned. 121 func (u *Unit) ShouldBeAssigned() bool { 122 return !u.isCaas() 123 } 124 125 func (u *Unit) isCaas() bool { 126 return u.modelType == ModelTypeCAAS 127 } 128 129 // IsSidecar returns true when using new CAAS charms in sidecar mode. 130 func (u *Unit) IsSidecar() (bool, error) { 131 app, err := u.Application() 132 if err != nil { 133 return false, errors.Trace(err) 134 } 135 return app.IsSidecar() 136 } 137 138 // Application returns the application. 139 func (u *Unit) Application() (*Application, error) { 140 return u.st.Application(u.doc.Application) 141 } 142 143 // ConfigSettings returns the complete set of application charm config settings 144 // available to the unit. Unset values will be replaced with the default 145 // value for the associated option, and may thus be nil when no default is 146 // specified. 147 func (u *Unit) ConfigSettings() (charm.Settings, error) { 148 if u.doc.CharmURL == nil { 149 return nil, fmt.Errorf("unit's charm URL must be set before retrieving config") 150 } 151 152 // TODO (manadart 2019-02-21) Factor the current generation into this call. 153 s, err := charmSettingsWithDefaults(u.st, u.doc.CharmURL, u.doc.Application, model.GenerationMaster) 154 if err != nil { 155 return nil, errors.Annotatef(err, "charm config for unit %q", u.Name()) 156 } 157 return s, nil 158 } 159 160 // ApplicationName returns the application name. 161 func (u *Unit) ApplicationName() string { 162 return u.doc.Application 163 } 164 165 // Base returns the deployed charm's base. 166 func (u *Unit) Base() Base { 167 return u.doc.Base 168 } 169 170 // String returns the unit as string. 171 func (u *Unit) String() string { 172 return u.doc.Name 173 } 174 175 // Name returns the unit name. 176 func (u *Unit) Name() string { 177 return u.doc.Name 178 } 179 180 // unitGlobalKey returns the global database key for the named unit. 181 func unitGlobalKey(name string) string { 182 return "u#" + name + "#charm" 183 } 184 185 // globalWorkloadVersionKey returns the global database key for the 186 // workload version status key for this unit. 187 func globalWorkloadVersionKey(name string) string { 188 return unitGlobalKey(name) + "#sat#workload-version" 189 } 190 191 // globalAgentKey returns the global database key for the unit. 192 func (u *Unit) globalAgentKey() string { 193 return unitAgentGlobalKey(u.doc.Name) 194 } 195 196 // globalMeterStatusKey returns the global database key for the meter status of the unit. 197 func (u *Unit) globalMeterStatusKey() string { 198 return unitAgentGlobalKey(u.doc.Name) 199 } 200 201 // globalKey returns the global database key for the unit. 202 func (u *Unit) globalKey() string { 203 return unitGlobalKey(u.doc.Name) 204 } 205 206 // globalWorkloadVersionKey returns the global database key for the unit's 207 // workload version info. 208 func (u *Unit) globalWorkloadVersionKey() string { 209 return globalWorkloadVersionKey(u.doc.Name) 210 } 211 212 // globalCloudContainerKey returns the global database key for the unit's 213 // Cloud Container info. 214 func (u *Unit) globalCloudContainerKey() string { 215 return globalCloudContainerKey(u.doc.Name) 216 } 217 218 // Life returns whether the unit is Alive, Dying or Dead. 219 func (u *Unit) Life() Life { 220 return u.doc.Life 221 } 222 223 // WorkloadVersion returns the version of the running workload set by 224 // the charm (eg, the version of postgresql that is running, as 225 // opposed to the version of the postgresql charm). 226 func (u *Unit) WorkloadVersion() (string, error) { 227 unitStatus, err := getStatus(u.st.db(), u.globalWorkloadVersionKey(), "workload") 228 if errors.IsNotFound(err) { 229 return "", nil 230 } else if err != nil { 231 return "", errors.Trace(err) 232 } 233 return unitStatus.Message, nil 234 } 235 236 // SetWorkloadVersion sets the version of the workload that the unit 237 // is currently running. 238 func (u *Unit) SetWorkloadVersion(version string) error { 239 // Store in status rather than an attribute of the unit doc - we 240 // want to avoid everything being an attr of the main docs to 241 // stop a swarm of watchers being notified for irrelevant changes. 242 now := u.st.clock().Now() 243 return setStatus(u.st.db(), setStatusParams{ 244 badge: "workload", 245 globalKey: u.globalWorkloadVersionKey(), 246 status: status.Active, 247 message: version, 248 updated: &now, 249 }) 250 } 251 252 // WorkloadVersionHistory returns a HistoryGetter which enables the 253 // caller to request past workload version changes. 254 func (u *Unit) WorkloadVersionHistory() *HistoryGetter { 255 return &HistoryGetter{st: u.st, globalKey: u.globalWorkloadVersionKey()} 256 } 257 258 // AgentTools returns the tools that the agent is currently running. 259 // It an error that satisfies errors.IsNotFound if the tools have not 260 // yet been set. 261 func (u *Unit) AgentTools() (*tools.Tools, error) { 262 if u.doc.Tools == nil { 263 return nil, errors.NotFoundf("agent binaries for unit %q", u) 264 } 265 result := *u.doc.Tools 266 return &result, nil 267 } 268 269 // SetAgentVersion sets the version of juju that the agent is 270 // currently running. 271 func (u *Unit) SetAgentVersion(v version.Binary) (err error) { 272 defer errors.DeferredAnnotatef(&err, "cannot set agent version for unit %q", u) 273 if err = checkVersionValidity(v); err != nil { 274 return err 275 } 276 versionedTool := &tools.Tools{Version: v} 277 ops := []txn.Op{{ 278 C: unitsC, 279 Id: u.doc.DocID, 280 Assert: notDeadDoc, 281 Update: bson.D{{"$set", bson.D{{"tools", versionedTool}}}}, 282 }} 283 if err := u.st.db().RunTransaction(ops); err != nil { 284 return onAbort(err, stateerrors.ErrDead) 285 } 286 u.doc.Tools = versionedTool 287 return nil 288 } 289 290 // SetPassword sets the password for the machine's agent. 291 func (u *Unit) SetPassword(password string) error { 292 if len(password) < utils.MinAgentPasswordLength { 293 return fmt.Errorf("password is only %d bytes long, and is not a valid Agent password", len(password)) 294 } 295 return u.setPasswordHash(utils.AgentPasswordHash(password)) 296 } 297 298 // setPasswordHash sets the underlying password hash in the database directly 299 // to the value supplied. This is split out from SetPassword to allow direct 300 // manipulation in tests (to check for backwards compatibility). 301 func (u *Unit) setPasswordHash(passwordHash string) error { 302 ops := u.setPasswordHashOps(passwordHash) 303 err := u.st.db().RunTransaction(ops) 304 if err != nil { 305 return fmt.Errorf("cannot set password of unit %q: %v", u, onAbort(err, stateerrors.ErrDead)) 306 } 307 u.doc.PasswordHash = passwordHash 308 return nil 309 } 310 311 func (u *Unit) setPasswordHashOps(passwordHash string) []txn.Op { 312 return []txn.Op{{ 313 C: unitsC, 314 Id: u.doc.DocID, 315 Assert: notDeadDoc, 316 Update: bson.D{{"$set", bson.D{{"passwordhash", passwordHash}}}}, 317 }} 318 } 319 320 // PasswordValid returns whether the given password is valid 321 // for the given unit. 322 func (u *Unit) PasswordValid(password string) bool { 323 agentHash := utils.AgentPasswordHash(password) 324 if agentHash == u.doc.PasswordHash { 325 return true 326 } 327 // Increased error logging for LP: 1956975 agent lost due to ErrBadCreds. 328 // Usually found 1-3 months after it happened. It would be helpful to have 329 // additional data we can go back and find. 330 if agentHash == "" { 331 logger.Errorf("%q invalid password, provided agent hash empty", u.Name()) 332 return false 333 } 334 if u.doc.PasswordHash == "" { 335 logger.Errorf("%q invalid password, doc password hash empty", u.Name()) 336 return false 337 } 338 app, err := u.Application() 339 if err != nil { 340 logger.Errorf("%q invalid password, error getting application: %s", u.Name(), err.Error()) 341 return false 342 } 343 units, err := app.AllUnits() 344 if err != nil { 345 logger.Errorf("%q invalid password, error getting all units: %s", app.Name(), err.Error()) 346 return false 347 } 348 for _, unit := range units { 349 if u.Name() != unit.Name() && agentHash == unit.doc.PasswordHash { 350 logger.Errorf("%q invalid password, provided agent hash matches %q password hash", u.Name(), unit.Name()) 351 } 352 } 353 return false 354 } 355 356 // UpdateOperation returns a model operation that will update a unit. 357 func (u *Unit) UpdateOperation(props UnitUpdateProperties) *UpdateUnitOperation { 358 return &UpdateUnitOperation{ 359 unit: &Unit{st: u.st, doc: u.doc, modelType: u.modelType}, 360 props: props, 361 } 362 } 363 364 // UpdateUnitOperation is a model operation for updating a unit. 365 type UpdateUnitOperation struct { 366 unit *Unit 367 props UnitUpdateProperties 368 369 setStatusDocs map[string]statusDoc 370 } 371 372 // Build is part of the ModelOperation interface. 373 func (op *UpdateUnitOperation) Build(_ int) ([]txn.Op, error) { 374 op.setStatusDocs = make(map[string]statusDoc) 375 376 containerInfo, err := op.unit.cloudContainer() 377 if err != nil && !errors.IsNotFound(err) { 378 return nil, errors.Trace(err) 379 } 380 if containerInfo == nil { 381 containerInfo = &cloudContainerDoc{ 382 Id: op.unit.globalKey(), 383 } 384 } 385 existingContainerInfo := *containerInfo 386 387 var newProviderId string 388 if op.props.ProviderId != nil { 389 newProviderId = *op.props.ProviderId 390 } 391 if containerInfo.ProviderId != "" && 392 newProviderId != "" && 393 containerInfo.ProviderId != newProviderId { 394 logger.Debugf("unit %q has provider id %q which changed to %q", 395 op.unit.Name(), containerInfo.ProviderId, newProviderId) 396 } 397 398 if op.props.ProviderId != nil { 399 containerInfo.ProviderId = newProviderId 400 } 401 if op.props.Address != nil { 402 networkAddr := network.NewSpaceAddress(*op.props.Address, network.WithScope(network.ScopeMachineLocal)) 403 addr := fromNetworkAddress(networkAddr, network.OriginProvider) 404 containerInfo.Address = &addr 405 } 406 if op.props.Ports != nil { 407 containerInfo.Ports = *op.props.Ports 408 } 409 // Currently, we only update container attributes but that might change. 410 var ops []txn.Op 411 if !reflect.DeepEqual(*containerInfo, existingContainerInfo) { 412 containerOps, err := op.unit.saveContainerOps(*containerInfo) 413 if err != nil { 414 return nil, errors.Trace(err) 415 } 416 ops = append(ops, containerOps...) 417 } 418 419 updateStatus := func(key, badge string, status *status.StatusInfo) error { 420 now := op.unit.st.clock().Now() 421 doc := statusDoc{ 422 Status: status.Status, 423 StatusInfo: status.Message, 424 StatusData: mgoutils.EscapeKeys(status.Data), 425 Updated: now.UnixNano(), 426 } 427 op.setStatusDocs[key] = doc 428 // It's possible we're getting a first status update (i.e. cloud container) 429 _, err = getStatus(op.unit.st.db(), key, badge) 430 if err != nil { 431 if !errors.IsNotFound(err) { 432 return errors.Trace(err) 433 } 434 statusOps := createStatusOp(op.unit.st, key, doc) 435 ops = append(ops, statusOps) 436 } else { 437 statusOps, err := statusSetOps(op.unit.st.db(), doc, key) 438 if err != nil { 439 return errors.Trace(err) 440 } 441 ops = append(ops, statusOps...) 442 } 443 return nil 444 } 445 if op.props.AgentStatus != nil { 446 if err := updateStatus(op.unit.globalAgentKey(), "agent", op.props.AgentStatus); err != nil { 447 return nil, errors.Trace(err) 448 } 449 } 450 451 var cloudContainerStatus status.StatusInfo 452 if op.props.CloudContainerStatus != nil { 453 if err := updateStatus(op.unit.globalCloudContainerKey(), "cloud container", op.props.CloudContainerStatus); err != nil { 454 return nil, errors.Trace(err) 455 } 456 cloudContainerStatus = *op.props.CloudContainerStatus 457 } 458 if cloudContainerStatus.Status != "" { 459 // Since we have updated cloud container, that may impact on 460 // the perceived unit status. we'll update status history if the 461 // unit status is different due to having a cloud container status. 462 // This correctly ensures the status history goes from "waiting for 463 // container" to <something else>. 464 unitStatus, err := getStatus(op.unit.st.db(), op.unit.globalKey(), "unit") 465 if err != nil { 466 return nil, errors.Trace(err) 467 } 468 469 modifiedStatus := status.UnitDisplayStatus(unitStatus, cloudContainerStatus, true) 470 now := op.unit.st.clock().Now() 471 doc := statusDoc{ 472 Status: modifiedStatus.Status, 473 StatusInfo: modifiedStatus.Message, 474 StatusData: mgoutils.EscapeKeys(modifiedStatus.Data), 475 Updated: now.UnixNano(), 476 } 477 op.setStatusDocs[op.unit.globalKey()] = doc 478 } 479 return ops, nil 480 } 481 482 // Done is part of the ModelOperation interface. 483 func (op *UpdateUnitOperation) Done(err error) error { 484 if err != nil { 485 return errors.Annotatef(err, "updating unit %q", op.unit.Name()) 486 } 487 // We can't include in the ops slice the necessary status history updates, 488 // so as with existing practice, do a best effort update of status history. 489 for key, doc := range op.setStatusDocs { 490 _, _ = probablyUpdateStatusHistory(op.unit.st.db(), key, doc) 491 } 492 return nil 493 } 494 495 // Destroy, when called on a Alive unit, advances its lifecycle as far as 496 // possible; it otherwise has no effect. In most situations, the unit's 497 // life is just set to Dying; but if a principal unit that is not assigned 498 // to a provisioned machine is Destroyed, it will be removed from state 499 // directly. 500 func (u *Unit) Destroy() error { 501 errs, err := u.DestroyWithForce(false, time.Duration(0)) 502 if len(errs) != 0 { 503 logger.Warningf("operational errors destroying unit %v: %v", u.Name(), errs) 504 } 505 return err 506 } 507 508 // DestroyWithForce does the same thing as Destroy() but 509 // ignores errors. 510 func (u *Unit) DestroyWithForce(force bool, maxWait time.Duration) (errs []error, err error) { 511 defer func() { 512 if err == nil { 513 // This is a white lie; the document might actually be removed. 514 u.doc.Life = Dying 515 } 516 }() 517 op := u.DestroyOperation() 518 op.Force = force 519 op.MaxWait = maxWait 520 err = u.st.ApplyOperation(op) 521 return op.Errors, err 522 } 523 524 // DestroyOperation returns a model operation that will destroy the unit. 525 func (u *Unit) DestroyOperation() *DestroyUnitOperation { 526 return &DestroyUnitOperation{ 527 unit: &Unit{st: u.st, doc: u.doc, modelType: u.modelType}, 528 } 529 } 530 531 // DestroyUnitOperation is a model operation for destroying a unit. 532 type DestroyUnitOperation struct { 533 // ForcedOperation stores needed information to force this operation. 534 ForcedOperation 535 536 // unit holds the unit to destroy. 537 unit *Unit 538 539 // DestroyStorage controls whether or not storage attached 540 // to the unit is destroyed. If this is false, then detachable 541 // storage will be detached and left in the model. 542 DestroyStorage bool 543 } 544 545 // Build is part of the ModelOperation interface. 546 func (op *DestroyUnitOperation) Build(attempt int) ([]txn.Op, error) { 547 if attempt > 0 { 548 if err := op.unit.Refresh(); errors.IsNotFound(err) { 549 return nil, jujutxn.ErrNoOperations 550 } else if err != nil { 551 return nil, err 552 } 553 } 554 // When 'force' is set on the operation, this call will return both needed operations 555 // as well as all operational errors encountered. 556 // If the 'force' is not set, any error will be fatal and no operations will be returned. 557 switch ops, err := op.destroyOps(); err { 558 case errRefresh: 559 case errAlreadyDying: 560 return nil, jujutxn.ErrNoOperations 561 case nil: 562 return ops, nil 563 default: 564 if op.Force { 565 logger.Warningf("forcing unit destruction for %v despite error %v", op.unit.Name(), err) 566 return ops, nil 567 } 568 return nil, err 569 } 570 return nil, jujutxn.ErrNoOperations 571 } 572 573 // Done is part of the ModelOperation interface. 574 func (op *DestroyUnitOperation) Done(err error) error { 575 if err != nil { 576 if !op.Force { 577 return errors.Annotatef(err, "cannot destroy unit %q", op.unit) 578 } 579 op.AddError(errors.Errorf("force destroy unit %q proceeded despite encountering ERROR %v", op.unit, err)) 580 } 581 if err := op.eraseHistory(); err != nil { 582 if !op.Force { 583 logger.Errorf("cannot delete history for unit %q: %v", op.unit.globalKey(), err) 584 } 585 op.AddError(errors.Errorf("force erase unit's %q history proceeded despite encountering ERROR %v", op.unit.globalKey(), err)) 586 } 587 if err := op.deleteSecrets(); err != nil { 588 logger.Errorf("cannot delete secrets for unit %q: %v", op.unit, err) 589 } 590 return nil 591 } 592 593 func (op *DestroyUnitOperation) eraseHistory() error { 594 var stop <-chan struct{} // stop not used here yet. 595 if err := eraseStatusHistory(stop, op.unit.st, op.unit.globalKey()); err != nil { 596 one := errors.Annotate(err, "workload") 597 if op.FatalError(one) { 598 return one 599 } 600 } 601 if err := eraseStatusHistory(stop, op.unit.st, op.unit.globalAgentKey()); err != nil { 602 one := errors.Annotate(err, "agent") 603 if op.FatalError(one) { 604 return one 605 } 606 } 607 if err := eraseStatusHistory(stop, op.unit.st, op.unit.globalWorkloadVersionKey()); err != nil { 608 one := errors.Annotate(err, "version") 609 if op.FatalError(one) { 610 return one 611 } 612 } 613 return nil 614 } 615 616 func (op *DestroyUnitOperation) deleteSecrets() error { 617 ownedURIs, err := op.unit.st.referencedSecrets(op.unit.Tag(), "owner-tag") 618 if err != nil { 619 return errors.Trace(err) 620 } 621 if _, err := op.unit.st.deleteSecrets(ownedURIs); err != nil { 622 return errors.Annotatef(err, "deleting owned secrets for %q", op.unit.Name()) 623 } 624 if err := op.unit.st.RemoveSecretConsumer(op.unit.Tag()); err != nil { 625 return errors.Annotatef(err, "deleting secret consumer records for %q", op.unit.Name()) 626 } 627 return nil 628 } 629 630 // destroyOps returns the operations required to destroy the unit. If it 631 // returns errRefresh, the unit should be refreshed and the destruction 632 // operations recalculated. 633 // When 'force' is set on the operation, this call will return both needed operations 634 // as well as all operational errors encountered. 635 // If the 'force' is not set, any error will be fatal and no operations will be returned. 636 func (op *DestroyUnitOperation) destroyOps() ([]txn.Op, error) { 637 if op.unit.doc.Life != Alive { 638 if !op.Force { 639 return nil, errAlreadyDying 640 } 641 } 642 643 // Where possible, we'd like to be able to short-circuit unit destruction 644 // such that units can be removed directly rather than waiting for their 645 // agents to start, observe Dying, set Dead, and shut down; this takes a 646 // long time and is vexing to users. This turns out to be possible if and 647 // only if the unit agent has not yet set its status; this implies that the 648 // most the unit could possibly have done is to run its install hook. 649 // 650 // There's no harm in removing a unit that's run its install hook only -- 651 // or, at least, there is no more harm than there is in removing a unit 652 // that's run its stop hook, and that's the usual condition. 653 // 654 // Principals with subordinates are never eligible for this shortcut, 655 // because the unit agent must inevitably have set a status before getting 656 // to the point where it can actually create its subordinate. 657 // 658 // Subordinates should be eligible for the shortcut but are not currently 659 // considered, on the basis that (1) they were created by active principals 660 // and can be expected to be deployed pretty soon afterwards, so we don't 661 // lose much time and (2) by maintaining this restriction, I can reduce 662 // the number of tests that have to change and defer that improvement to 663 // its own CL. 664 665 // if the minUnits document exists, we need to increment the revno so that 666 // it is obvious the min units count is changing. 667 minUnitsOp := minUnitsTriggerOp(op.unit.st, op.unit.ApplicationName()) 668 minUnitsExists, err := doesMinUnitsExist(op.unit.st, op.unit.ApplicationName()) 669 if err != nil { 670 return nil, errors.Trace(err) 671 } 672 cleanupOp := newCleanupOp(cleanupDyingUnit, op.unit.doc.Name, op.DestroyStorage, op.Force, op.MaxWait) 673 674 // If we're forcing destruction the assertion shouldn't be that 675 // life is alive, but that it's what we think it is now. 676 assertion := isAliveDoc 677 if op.Force { 678 assertion = bson.D{{"life", op.unit.doc.Life}} 679 } 680 681 setDyingOp := txn.Op{ 682 C: unitsC, 683 Id: op.unit.doc.DocID, 684 Assert: assertion, 685 Update: bson.D{{"$set", bson.D{{"life", Dying}}}}, 686 } 687 setDyingOps := func(dyingErr error) ([]txn.Op, error) { 688 if !op.Force && dyingErr != nil { 689 // If we are not forcing removal, we care about the errors as they will stop removal. 690 // Don't return operations. 691 return nil, dyingErr 692 } 693 // If we are forcing, we care about the errors as we want report them to the user. 694 // But we also want operations to power through the removal. 695 if dyingErr != nil { 696 op.AddError(errors.Errorf("force destroying dying unit %v despite error %v", op.unit.Name(), dyingErr)) 697 } 698 ops := []txn.Op{setDyingOp, cleanupOp} 699 if minUnitsExists { 700 ops = append(ops, minUnitsOp) 701 } 702 return ops, nil 703 } 704 if op.unit.doc.Principal != "" { 705 return setDyingOps(nil) 706 } else if len(op.unit.doc.Subordinates)+op.unit.doc.StorageAttachmentCount != 0 { 707 return setDyingOps(nil) 708 } 709 710 // See if the unit agent has started running. 711 // If so then we can't set directly to dead. 712 isAssigned := op.unit.doc.MachineId != "" 713 shouldBeAssigned := op.unit.ShouldBeAssigned() 714 agentStatusDocId := op.unit.globalAgentKey() 715 agentStatusInfo, agentErr := getStatus(op.unit.st.db(), agentStatusDocId, "agent") 716 if errors.IsNotFound(agentErr) { 717 return nil, errAlreadyDying 718 } else if agentErr != nil { 719 if !op.Force { 720 return nil, errors.Trace(agentErr) 721 } 722 } 723 724 // This has to be a function since we want to delay the evaluation of the value, 725 // in case agent erred out. 726 isReady := func() (bool, error) { 727 // IAAS models need the unit to be assigned. 728 if shouldBeAssigned { 729 return isAssigned && agentStatusInfo.Status != status.Allocating, nil 730 } 731 // For CAAS models, check to see if the unit agent has started (the 732 // presence of the unitstates row indicates this). 733 unitState, err := op.unit.State() 734 if err != nil { 735 return false, errors.Trace(err) 736 } 737 return unitState.Modified(), nil 738 } 739 if agentErr == nil { 740 ready, err := isReady() 741 if op.FatalError(err) { 742 return nil, errors.Trace(err) 743 } 744 if ready { 745 return setDyingOps(agentErr) 746 } 747 } 748 switch agentStatusInfo.Status { 749 case status.Error, status.Allocating: 750 default: 751 err := errors.Errorf("unexpected unit state - unit with status %v is not deployed", agentStatusInfo.Status) 752 if op.FatalError(err) { 753 return nil, err 754 } 755 } 756 757 statusOp := txn.Op{ 758 C: statusesC, 759 Id: op.unit.st.docID(agentStatusDocId), 760 Assert: bson.D{{"status", agentStatusInfo.Status}}, 761 } 762 removeAsserts := isAliveDoc 763 if op.Force { 764 removeAsserts = bson.D{{"life", op.unit.doc.Life}} 765 } 766 removeAsserts = append(removeAsserts, bson.DocElem{ 767 "$and", []bson.D{ 768 unitHasNoSubordinates, 769 unitHasNoStorageAttachments, 770 }, 771 }) 772 // If the unit is unassigned, ensure it is not assigned in the interim. 773 if !isAssigned && shouldBeAssigned { 774 removeAsserts = append(removeAsserts, bson.DocElem{"machineid", ""}) 775 } 776 777 // When 'force' is set, this call will return some, if not all, needed operations. 778 // All operational errors encountered will be added to the operation. 779 // If the 'force' is not set, any error will be fatal and no operations will be returned. 780 removeOps, err := op.unit.removeOps(removeAsserts, &op.ForcedOperation, op.DestroyStorage) 781 if err == errAlreadyRemoved { 782 return nil, errAlreadyDying 783 } else if op.FatalError(err) { 784 return nil, err 785 } 786 ops := []txn.Op{statusOp} 787 if minUnitsExists { 788 ops = append(ops, minUnitsOp) 789 } 790 ops = append(ops, removeOps...) 791 return ops, nil 792 } 793 794 // destroyHostOps returns all necessary operations to destroy the application unit's host machine, 795 // or ensure that the conditions preventing its destruction remain stable through the transaction. 796 // When 'force' is set, this call will return needed operations 797 // and accumulate all operational errors encountered on the operation. 798 // If the 'force' is not set, any error will be fatal and no operations will be returned. 799 func (u *Unit) destroyHostOps(a *Application, op *ForcedOperation) (ops []txn.Op, err error) { 800 if a.doc.Subordinate { 801 return []txn.Op{{ 802 C: unitsC, 803 Id: u.st.docID(u.doc.Principal), 804 Assert: txn.DocExists, 805 Update: bson.D{{"$pull", bson.D{{"subordinates", u.doc.Name}}}}, 806 }}, nil 807 } else if u.doc.MachineId == "" { 808 unitLogger.Tracef("unit %v unassigned", u) 809 return nil, nil 810 } 811 812 m, err := u.st.Machine(u.doc.MachineId) 813 if err != nil { 814 if errors.IsNotFound(err) { 815 return nil, nil 816 } 817 return nil, err 818 } 819 node, err := u.st.ControllerNode(u.doc.MachineId) 820 if err != nil && !errors.IsNotFound(err) { 821 return nil, err 822 } 823 haveControllerNode := err == nil 824 hasVote := haveControllerNode && node.HasVote() 825 826 containerCheck := true // whether container conditions allow destroying the host machine 827 containers, err := m.Containers() 828 if op.FatalError(err) { 829 return nil, err 830 } 831 if len(containers) > 0 { 832 ops = append(ops, txn.Op{ 833 C: containerRefsC, 834 Id: m.doc.DocID, 835 Assert: bson.D{{"children.0", bson.D{{"$exists", 1}}}}, 836 }) 837 containerCheck = false 838 } else { 839 ops = append(ops, txn.Op{ 840 C: containerRefsC, 841 Id: m.doc.DocID, 842 Assert: bson.D{{"$or", []bson.D{ 843 {{"children", bson.D{{"$size", 0}}}}, 844 {{"children", bson.D{{"$exists", false}}}}, 845 }}}, 846 }) 847 } 848 849 isController := m.IsManager() 850 machineCheck := true // whether host machine conditions allow destroy 851 if len(m.doc.Principals) != 1 || m.doc.Principals[0] != u.doc.Name { 852 machineCheck = false 853 } else if isController { 854 // Check that the machine does not have any responsibilities that 855 // prevent a lifecycle change. 856 machineCheck = false 857 } else if hasVote { 858 machineCheck = false 859 } 860 861 // assert that the machine conditions pertaining to host removal conditions 862 // remain the same throughout the transaction. 863 var machineAssert bson.D 864 var controllerNodeAssert interface{} 865 if machineCheck { 866 machineAssert = bson.D{{"$and", []bson.D{ 867 {{"principals", []string{u.doc.Name}}}, 868 {{"jobs", bson.D{{"$nin", []MachineJob{JobManageModel}}}}}, 869 }}} 870 controllerNodeAssert = txn.DocMissing 871 if haveControllerNode { 872 controllerNodeAssert = bson.D{{"has-vote", false}} 873 } 874 } else { 875 machineAssert = bson.D{{"$or", []bson.D{ 876 {{"principals", bson.D{{"$ne", []string{u.doc.Name}}}}}, 877 {{"jobs", bson.D{{"$in", []MachineJob{JobManageModel}}}}}, 878 }}} 879 if isController { 880 controllerNodeAssert = txn.DocExists 881 } 882 } 883 884 // If removal conditions satisfied by machine & container docs, we can 885 // destroy it, in addition to removing the unit principal. 886 machineUpdate := bson.D{{"$pull", bson.D{{"principals", u.doc.Name}}}} 887 var cleanupOps []txn.Op 888 if machineCheck && containerCheck { 889 machineUpdate = append(machineUpdate, bson.D{{"$set", bson.D{{"life", Dying}}}}...) 890 if !op.Force { 891 cleanupOps = []txn.Op{newCleanupOp(cleanupDyingMachine, m.doc.Id, op.Force)} 892 } else { 893 cleanupOps = []txn.Op{newCleanupOp(cleanupForceDestroyedMachine, m.doc.Id, op.MaxWait)} 894 } 895 } 896 897 ops = append(ops, txn.Op{ 898 C: machinesC, 899 Id: m.doc.DocID, 900 Assert: machineAssert, 901 Update: machineUpdate, 902 }) 903 if controllerNodeAssert != nil { 904 ops = append(ops, txn.Op{ 905 C: controllerNodesC, 906 Id: m.st.docID(m.Id()), 907 Assert: controllerNodeAssert, 908 }) 909 } 910 911 return append(ops, cleanupOps...), nil 912 } 913 914 // removeOps returns the operations necessary to remove the unit, assuming 915 // the supplied asserts apply to the unit document. 916 // When 'force' is set, this call will return needed operations 917 // accumulating all operational errors in the operation. 918 // If the 'force' is not set, any error will be fatal and no operations will be returned. 919 func (u *Unit) removeOps(asserts bson.D, op *ForcedOperation, destroyStorage bool) ([]txn.Op, error) { 920 app, err := u.st.Application(u.doc.Application) 921 if errors.IsNotFound(err) { 922 // If the application has been removed, the unit must already have been. 923 return nil, errAlreadyRemoved 924 } else if err != nil { 925 // If we cannot find application, no amount of force will succeed after this point. 926 return nil, err 927 } 928 return app.removeUnitOps(u, asserts, op, destroyStorage) 929 } 930 931 var unitHasNoSubordinates = bson.D{{ 932 "$or", []bson.D{ 933 {{"subordinates", bson.D{{"$size", 0}}}}, 934 {{"subordinates", bson.D{{"$exists", false}}}}, 935 }, 936 }} 937 938 var unitHasNoStorageAttachments = bson.D{{ 939 "$or", []bson.D{ 940 {{"storageattachmentcount", 0}}, 941 {{"storageattachmentcount", bson.D{{"$exists", false}}}}, 942 }, 943 }} 944 945 // EnsureDead sets the unit lifecycle to Dead if it is Alive or Dying. 946 // It does nothing otherwise. If the unit has subordinates, it will 947 // return ErrUnitHasSubordinates; otherwise, if it has storage instances, 948 // it will return ErrUnitHasStorageInstances. 949 func (u *Unit) EnsureDead() (err error) { 950 if u.doc.Life == Dead { 951 return nil 952 } 953 defer func() { 954 if err == nil { 955 u.doc.Life = Dead 956 } 957 }() 958 assert := append(notDeadDoc, bson.DocElem{ 959 "$and", []bson.D{ 960 unitHasNoSubordinates, 961 unitHasNoStorageAttachments, 962 }, 963 }) 964 ops := []txn.Op{{ 965 C: unitsC, 966 Id: u.doc.DocID, 967 Assert: assert, 968 Update: bson.D{{"$set", bson.D{{"life", Dead}}}}, 969 }} 970 if err := u.st.db().RunTransaction(ops); err != txn.ErrAborted { 971 return err 972 } 973 if notDead, err := isNotDead(u.st, unitsC, u.doc.DocID); err != nil { 974 return err 975 } else if !notDead { 976 return nil 977 } 978 if err := u.Refresh(); errors.IsNotFound(err) { 979 return nil 980 } else if err != nil { 981 return err 982 } 983 if len(u.doc.Subordinates) > 0 { 984 return stateerrors.ErrUnitHasSubordinates 985 } 986 return stateerrors.ErrUnitHasStorageAttachments 987 } 988 989 // RemoveOperation returns a model operation that will remove the unit. 990 func (u *Unit) RemoveOperation(force bool) *RemoveUnitOperation { 991 return &RemoveUnitOperation{ 992 unit: &Unit{st: u.st, doc: u.doc, modelType: u.modelType}, 993 ForcedOperation: ForcedOperation{Force: force}, 994 } 995 } 996 997 // RemoveUnitOperation is a model operation for removing a unit. 998 type RemoveUnitOperation struct { 999 // ForcedOperation stores needed information to force this operation. 1000 ForcedOperation 1001 1002 // unit holds the unit to remove. 1003 unit *Unit 1004 } 1005 1006 // Build is part of the ModelOperation interface. 1007 func (op *RemoveUnitOperation) Build(attempt int) ([]txn.Op, error) { 1008 if attempt > 0 { 1009 if err := op.unit.Refresh(); errors.IsNotFound(err) { 1010 return nil, jujutxn.ErrNoOperations 1011 } else if err != nil { 1012 return nil, err 1013 } 1014 } 1015 // When 'force' is set on the operation, this call will return both needed operations 1016 // as well as all operational errors encountered. 1017 // If the 'force' is not set, any error will be fatal and no operations will be returned. 1018 switch ops, err := op.removeOps(); err { 1019 case errRefresh: 1020 case errAlreadyDying: 1021 return nil, jujutxn.ErrNoOperations 1022 case nil: 1023 return ops, nil 1024 default: 1025 if op.Force { 1026 logger.Warningf("forcing unit removal for %v despite error %v", op.unit.Name(), err) 1027 return ops, nil 1028 } 1029 return nil, err 1030 } 1031 return nil, jujutxn.ErrNoOperations 1032 } 1033 1034 // Done is part of the ModelOperation interface. 1035 func (op *RemoveUnitOperation) Done(err error) error { 1036 if err != nil { 1037 if !op.Force { 1038 return errors.Annotatef(err, "cannot remove unit %q", op.unit) 1039 } 1040 op.AddError(errors.Errorf("force removing unit %q proceeded despite encountering ERROR %v", op.unit, err)) 1041 } 1042 return nil 1043 } 1044 1045 // Remove removes the unit from state, and may remove its application as well, if 1046 // the application is Dying and no other references to it exist. It will fail if 1047 // the unit is not Dead. 1048 func (u *Unit) Remove() error { 1049 _, err := u.RemoveWithForce(false, time.Duration(0)) 1050 return err 1051 } 1052 1053 // RemoveWithForce removes the unit from state similar to the unit.Remove() but 1054 // it ignores errors. 1055 // In addition, this function also returns all non-fatal operational errors 1056 // encountered. 1057 func (u *Unit) RemoveWithForce(force bool, maxWait time.Duration) ([]error, error) { 1058 op := u.RemoveOperation(force) 1059 op.MaxWait = maxWait 1060 err := u.st.ApplyOperation(op) 1061 return op.Errors, err 1062 } 1063 1064 // When 'force' is set, this call will return needed operations 1065 // and all operational errors will be accumulated in operation itself. 1066 // If the 'force' is not set, any error will be fatal and no operations will be returned. 1067 func (op *RemoveUnitOperation) removeOps() (ops []txn.Op, err error) { 1068 if op.unit.doc.Life != Dead { 1069 return nil, errors.New("unit is not dead") 1070 } 1071 // Now the unit is Dead, we can be sure that it's impossible for it to 1072 // enter relation scopes (once it's Dying, we can be sure of this; but 1073 // EnsureDead does not require that it already be Dying, so this is the 1074 // only point at which we can safely backstop lp:1233457 and mitigate 1075 // the impact of unit agent bugs that leave relation scopes occupied). 1076 relations, err := matchingRelations(op.unit.st, op.unit.doc.Application) 1077 if op.FatalError(err) { 1078 return nil, err 1079 } else { 1080 failRelations := false 1081 for _, rel := range relations { 1082 ru, err := rel.Unit(op.unit) 1083 if err != nil { 1084 op.AddError(err) 1085 failRelations = true 1086 continue 1087 } 1088 leaveScopeOps, err := ru.leaveScopeForcedOps(&op.ForcedOperation) 1089 if err != nil && err != jujutxn.ErrNoOperations { 1090 op.AddError(err) 1091 failRelations = true 1092 } 1093 ops = append(ops, leaveScopeOps...) 1094 } 1095 if !op.Force && failRelations { 1096 return nil, op.LastError() 1097 } 1098 } 1099 1100 // Now we're sure we haven't left any scopes occupied by this unit, we 1101 // can safely remove the document. 1102 unitRemoveOps, err := op.unit.removeOps(isDeadDoc, &op.ForcedOperation, false) 1103 if op.FatalError(err) { 1104 return nil, err 1105 } 1106 return append(ops, unitRemoveOps...), nil 1107 } 1108 1109 // Resolved returns the resolved mode for the unit. 1110 func (u *Unit) Resolved() ResolvedMode { 1111 return u.doc.Resolved 1112 } 1113 1114 // IsPrincipal returns whether the unit is deployed in its own container, 1115 // and can therefore have subordinate applications deployed alongside it. 1116 func (u *Unit) IsPrincipal() bool { 1117 return u.doc.Principal == "" 1118 } 1119 1120 // SubordinateNames returns the names of any subordinate units. 1121 func (u *Unit) SubordinateNames() []string { 1122 subNames := make([]string, len(u.doc.Subordinates)) 1123 copy(subNames, u.doc.Subordinates) 1124 return subNames 1125 } 1126 1127 // RelationsJoined returns the relations for which the unit has entered scope 1128 // and neither left it nor prepared to leave it 1129 func (u *Unit) RelationsJoined() ([]*Relation, error) { 1130 return u.relations(func(ru *RelationUnit) (bool, error) { 1131 return ru.Joined() 1132 }) 1133 } 1134 1135 // RelationsInScope returns the relations for which the unit has entered scope 1136 // and not left it. 1137 func (u *Unit) RelationsInScope() ([]*Relation, error) { 1138 return u.relations(func(ru *RelationUnit) (bool, error) { 1139 return ru.InScope() 1140 }) 1141 } 1142 1143 type relationPredicate func(ru *RelationUnit) (bool, error) 1144 1145 // relations implements RelationsJoined and RelationsInScope. 1146 func (u *Unit) relations(predicate relationPredicate) ([]*Relation, error) { 1147 candidates, err := matchingRelations(u.st, u.doc.Application) 1148 if err != nil { 1149 return nil, err 1150 } 1151 var filtered []*Relation 1152 for _, relation := range candidates { 1153 relationUnit, err := relation.Unit(u) 1154 if err != nil { 1155 return nil, err 1156 } 1157 if include, err := predicate(relationUnit); err != nil { 1158 return nil, err 1159 } else if include { 1160 filtered = append(filtered, relation) 1161 } 1162 } 1163 return filtered, nil 1164 } 1165 1166 // PrincipalName returns the name of the unit's principal. 1167 // If the unit is not a subordinate, false is returned. 1168 func (u *Unit) PrincipalName() (string, bool) { 1169 return u.doc.Principal, u.doc.Principal != "" 1170 } 1171 1172 // machine returns the unit's machine. 1173 // 1174 // machine is part of the machineAssignable interface. 1175 func (u *Unit) machine() (*Machine, error) { 1176 id, err := u.AssignedMachineId() 1177 if err != nil { 1178 return nil, errors.Annotatef(err, "unit %v cannot get assigned machine", u) 1179 } 1180 m, err := u.st.Machine(id) 1181 if err != nil { 1182 return nil, errors.Annotatef(err, "unit %v misses machine id %v", u, id) 1183 } 1184 return m, nil 1185 } 1186 1187 // noAssignedMachineOp is part of the machineAssignable interface. 1188 func (u *Unit) noAssignedMachineOp() txn.Op { 1189 id := u.doc.DocID 1190 if u.doc.Principal != "" { 1191 id = u.doc.Principal 1192 } 1193 return txn.Op{ 1194 C: unitsC, 1195 Id: id, 1196 Assert: bson.D{{"machineid", ""}}, 1197 } 1198 } 1199 1200 // PublicAddress returns the public address of the unit. 1201 func (u *Unit) PublicAddress() (network.SpaceAddress, error) { 1202 if !u.ShouldBeAssigned() { 1203 return u.scopedAddress("public") 1204 } 1205 m, err := u.machine() 1206 if err != nil { 1207 unitLogger.Tracef("%v", err) 1208 return network.SpaceAddress{}, errors.Trace(err) 1209 } 1210 return m.PublicAddress() 1211 } 1212 1213 // PrivateAddress returns the private address of the unit. 1214 func (u *Unit) PrivateAddress() (network.SpaceAddress, error) { 1215 if !u.ShouldBeAssigned() { 1216 addr, err := u.scopedAddress("private") 1217 if network.IsNoAddressError(err) { 1218 return u.containerAddress() 1219 } 1220 return addr, errors.Trace(err) 1221 } 1222 m, err := u.machine() 1223 if err != nil { 1224 unitLogger.Tracef("%v", err) 1225 return network.SpaceAddress{}, errors.Trace(err) 1226 } 1227 return m.PrivateAddress() 1228 } 1229 1230 // AllAddresses returns the public and private addresses 1231 // plus the container address of the unit (if known). 1232 // Only relevant for CAAS models - will return an empty 1233 // slice for IAAS models. 1234 func (u *Unit) AllAddresses() (addrs network.SpaceAddresses, _ error) { 1235 if u.ShouldBeAssigned() { 1236 return addrs, nil 1237 } 1238 1239 // First the addresses of the service. 1240 serviceAddrs, err := u.serviceAddresses() 1241 if err != nil && !errors.IsNotFound(err) { 1242 return nil, errors.Trace(err) 1243 } 1244 if err == nil { 1245 addrs = append(addrs, serviceAddrs...) 1246 } 1247 1248 // Then the container address. 1249 containerAddr, err := u.containerAddress() 1250 if network.IsNoAddressError(err) { 1251 return addrs, nil 1252 } 1253 if err != nil { 1254 return nil, errors.Trace(err) 1255 } 1256 addrs = append(addrs, containerAddr) 1257 return addrs, nil 1258 } 1259 1260 // serviceAddresses returns the addresses of the service 1261 // managing the pods in which the unit workload is running. 1262 func (u *Unit) serviceAddresses() (network.SpaceAddresses, error) { 1263 app, err := u.Application() 1264 if err != nil { 1265 return nil, errors.Trace(err) 1266 } 1267 serviceInfo, err := app.ServiceInfo() 1268 if err != nil { 1269 return nil, errors.Trace(err) 1270 } 1271 return serviceInfo.Addresses(), nil 1272 } 1273 1274 // containerAddress returns the address of the pod's container. 1275 func (u *Unit) containerAddress() (network.SpaceAddress, error) { 1276 containerInfo, err := u.cloudContainer() 1277 if errors.IsNotFound(err) { 1278 return network.SpaceAddress{}, network.NoAddressError("container") 1279 } 1280 if err != nil { 1281 return network.SpaceAddress{}, errors.Trace(err) 1282 } 1283 addr := containerInfo.Address 1284 if addr == nil { 1285 return network.SpaceAddress{}, network.NoAddressError("container") 1286 } 1287 return addr.networkAddress(), nil 1288 } 1289 1290 func (u *Unit) scopedAddress(scope string) (network.SpaceAddress, error) { 1291 addresses, err := u.AllAddresses() 1292 if err != nil { 1293 return network.SpaceAddress{}, errors.Trace(err) 1294 } 1295 if len(addresses) == 0 { 1296 return network.SpaceAddress{}, network.NoAddressError(scope) 1297 } 1298 getStrictPublicAddr := func(addresses network.SpaceAddresses) (network.SpaceAddress, bool) { 1299 addr, ok := addresses.OneMatchingScope(network.ScopeMatchPublic) 1300 return addr, ok && addr.Scope == network.ScopePublic 1301 } 1302 1303 getInternalAddr := func(addresses network.SpaceAddresses) (network.SpaceAddress, bool) { 1304 return addresses.OneMatchingScope(network.ScopeMatchCloudLocal) 1305 } 1306 1307 var addrMatch func(network.SpaceAddresses) (network.SpaceAddress, bool) 1308 switch scope { 1309 case "public": 1310 addrMatch = getStrictPublicAddr 1311 case "private": 1312 addrMatch = getInternalAddr 1313 default: 1314 return network.SpaceAddress{}, errors.NotValidf("address scope %q", scope) 1315 } 1316 1317 addr, found := addrMatch(addresses) 1318 if !found { 1319 return network.SpaceAddress{}, network.NoAddressError(scope) 1320 } 1321 return addr, nil 1322 } 1323 1324 // AvailabilityZone returns the name of the availability zone into which 1325 // the unit's machine instance was provisioned. 1326 func (u *Unit) AvailabilityZone() (string, error) { 1327 m, err := u.machine() 1328 if err != nil { 1329 return "", errors.Trace(err) 1330 } 1331 return m.AvailabilityZone() 1332 } 1333 1334 // Refresh refreshes the contents of the Unit from the underlying 1335 // state. It an error that satisfies errors.IsNotFound if the unit has 1336 // been removed. 1337 func (u *Unit) Refresh() error { 1338 units, closer := u.st.db().GetCollection(unitsC) 1339 defer closer() 1340 1341 err := units.FindId(u.doc.DocID).One(&u.doc) 1342 if err == mgo.ErrNotFound { 1343 return errors.NotFoundf("unit %q", u) 1344 } 1345 if err != nil { 1346 return errors.Annotatef(err, "cannot refresh unit %q", u) 1347 } 1348 return nil 1349 } 1350 1351 // Agent Returns an agent by its unit's name. 1352 func (u *Unit) Agent() *UnitAgent { 1353 return newUnitAgent(u.st, u.Tag(), u.Name()) 1354 } 1355 1356 // AgentHistory returns an StatusHistoryGetter which can 1357 // be used to query the status history of the unit's agent. 1358 func (u *Unit) AgentHistory() status.StatusHistoryGetter { 1359 return u.Agent() 1360 } 1361 1362 // SetAgentStatus calls SetStatus for this unit's agent, this call 1363 // is equivalent to the former call to SetStatus when Agent and Unit 1364 // were not separate entities. 1365 func (u *Unit) SetAgentStatus(agentStatus status.StatusInfo) error { 1366 agent := newUnitAgent(u.st, u.Tag(), u.Name()) 1367 s := status.StatusInfo{ 1368 Status: agentStatus.Status, 1369 Message: agentStatus.Message, 1370 Data: agentStatus.Data, 1371 Since: agentStatus.Since, 1372 } 1373 return agent.SetStatus(s) 1374 } 1375 1376 // AgentStatus calls Status for this unit's agent, this call 1377 // is equivalent to the former call to Status when Agent and Unit 1378 // were not separate entities. 1379 func (u *Unit) AgentStatus() (status.StatusInfo, error) { 1380 agent := newUnitAgent(u.st, u.Tag(), u.Name()) 1381 return agent.Status() 1382 } 1383 1384 // StatusHistory returns a slice of at most <size> StatusInfo items 1385 // or items as old as <date> or items newer than now - <delta> time 1386 // representing past statuses for this unit. 1387 func (u *Unit) StatusHistory(filter status.StatusHistoryFilter) ([]status.StatusInfo, error) { 1388 args := &statusHistoryArgs{ 1389 db: u.st.db(), 1390 globalKey: u.globalKey(), 1391 filter: filter, 1392 clock: u.st.clock(), 1393 } 1394 return statusHistory(args) 1395 } 1396 1397 // Status returns the status of the unit. 1398 // This method relies on globalKey instead of globalAgentKey since it is part of 1399 // the effort to separate Unit from UnitAgent. Now the Status for UnitAgent is in 1400 // the UnitAgent struct. 1401 func (u *Unit) Status() (status.StatusInfo, error) { 1402 // The current health spec says when a hook error occurs, the workload should 1403 // be in error state, but the state model more correctly records the agent 1404 // itself as being in error. So we'll do that model translation here. 1405 // TODO(fwereade) as on unitagent, this transformation does not belong here. 1406 // For now, pretend we're always reading the unit status. 1407 info, err := getStatus(u.st.db(), u.globalAgentKey(), "unit") 1408 if err != nil { 1409 return status.StatusInfo{}, err 1410 } 1411 if info.Status != status.Error { 1412 info, err = getStatus(u.st.db(), u.globalKey(), "unit") 1413 if err != nil { 1414 return status.StatusInfo{}, err 1415 } 1416 } 1417 return info, nil 1418 } 1419 1420 // SetStatus sets the status of the unit agent. The optional values 1421 // allow to pass additional helpful status data. 1422 // This method relies on globalKey instead of globalAgentKey since it is part of 1423 // the effort to separate Unit from UnitAgent. Now the SetStatus for UnitAgent is in 1424 // the UnitAgent struct. 1425 func (u *Unit) SetStatus(unitStatus status.StatusInfo) error { 1426 if !status.ValidWorkloadStatus(unitStatus.Status) { 1427 return errors.Errorf("cannot set invalid status %q", unitStatus.Status) 1428 } 1429 1430 var newHistory *statusDoc 1431 if u.modelType == ModelTypeCAAS { 1432 // Caas Charms currently have no way to query workload status; 1433 // Cloud container status might contradict what the charm is 1434 // attempting to set, make sure the right history is set. 1435 cloudContainerStatus, err := getStatus(u.st.db(), globalCloudContainerKey(u.Name()), "cloud container") 1436 if err != nil { 1437 if !errors.IsNotFound(err) { 1438 return errors.Trace(err) 1439 } 1440 } 1441 model, err := u.st.Model() 1442 if err != nil { 1443 return errors.Trace(err) 1444 } 1445 expectWorkload, err := CheckApplicationExpectsWorkload(model, u.ApplicationName()) 1446 if err != nil { 1447 return errors.Trace(err) 1448 } 1449 newHistory, err = caasHistoryRewriteDoc(unitStatus, cloudContainerStatus, expectWorkload, status.UnitDisplayStatus, u.st.clock()) 1450 if err != nil { 1451 return errors.Trace(err) 1452 } 1453 } 1454 1455 return setStatus(u.st.db(), setStatusParams{ 1456 badge: "unit", 1457 globalKey: u.globalKey(), 1458 status: unitStatus.Status, 1459 message: unitStatus.Message, 1460 rawData: unitStatus.Data, 1461 updated: timeOrNow(unitStatus.Since, u.st.clock()), 1462 historyOverwrite: newHistory, 1463 }) 1464 } 1465 1466 // OpenedPortRanges returns a UnitPortRanges object that can be used to query 1467 // and/or mutate the port ranges opened by the unit. 1468 func (u *Unit) OpenedPortRanges() (UnitPortRanges, error) { 1469 if u.ShouldBeAssigned() { 1470 return u.openedPortRangesForIAAS() 1471 } 1472 isSidecar, err := u.IsSidecar() 1473 if err != nil { 1474 return nil, errors.Trace(err) 1475 } 1476 if u.isCaas() && !isSidecar { 1477 return nil, errors.NotSupportedf("open/close ports for %q", u.ApplicationName()) 1478 } 1479 return u.openedPortRangesForSidecar() 1480 } 1481 1482 func (u *Unit) openedPortRangesForSidecar() (UnitPortRanges, error) { 1483 return getUnitPortRanges(u.st, u.ApplicationName(), u.Name()) 1484 } 1485 1486 // openedPortRangesForIAAS returns a UnitPortRanges object that can be used to query 1487 // and/or mutate the port ranges opened by the unit on the machine it is 1488 // assigned to. 1489 // 1490 // Calls to OpenPortRanges will return back an error if the unit is not assigned 1491 // to a machine. 1492 func (u *Unit) openedPortRangesForIAAS() (UnitPortRanges, error) { 1493 machineID, err := u.AssignedMachineId() 1494 if err != nil { 1495 return nil, errors.Annotatef(err, "cannot retrieve ports for unit %q", u.Name()) 1496 } 1497 1498 machinePorts, err := getOpenedMachinePortRanges(u.st, machineID) 1499 if err != nil { 1500 return nil, errors.Annotatef(err, "cannot retrieve ports for unit %q", u.Name()) 1501 } 1502 1503 return machinePorts.ForUnit(u.Name()), nil 1504 } 1505 1506 // CharmURL returns the charm URL this unit is currently using. 1507 func (u *Unit) CharmURL() *string { 1508 return u.doc.CharmURL 1509 } 1510 1511 // SetCharmURL marks the unit as currently using the supplied charm URL. 1512 // An error will be returned if the unit is dead, or the charm URL not known. 1513 func (u *Unit) SetCharmURL(curl string) error { 1514 if curl == "" { 1515 return errors.Errorf("cannot set empty charm url") 1516 } 1517 1518 db, dbCloser := u.st.newDB() 1519 defer dbCloser() 1520 units, uCloser := db.GetCollection(unitsC) 1521 defer uCloser() 1522 charms, cCloser := db.GetCollection(charmsC) 1523 defer cCloser() 1524 1525 buildTxn := func(attempt int) ([]txn.Op, error) { 1526 if attempt > 0 { 1527 // NOTE: We're explicitly allowing SetCharmURL to succeed 1528 // when the unit is Dying, because application/charm upgrades 1529 // should still be allowed to apply to dying units, so 1530 // that bugs in departed/broken hooks can be addressed at 1531 // runtime. 1532 if notDead, err := isNotDeadWithSession(units, u.doc.DocID); err != nil { 1533 return nil, errors.Trace(err) 1534 } else if !notDead { 1535 return nil, stateerrors.ErrDead 1536 } 1537 } 1538 sel := bson.D{{"_id", u.doc.DocID}, {"charmurl", curl}} 1539 if count, err := units.Find(sel).Count(); err != nil { 1540 return nil, errors.Trace(err) 1541 } else if count == 1 { 1542 // Already set 1543 return nil, jujutxn.ErrNoOperations 1544 } 1545 if count, err := charms.FindId(curl).Count(); err != nil { 1546 return nil, errors.Trace(err) 1547 } else if count < 1 { 1548 return nil, errors.Errorf("unknown charm url %q", curl) 1549 } 1550 1551 // Add a reference to the application settings for the new charm. 1552 incOps, err := appCharmIncRefOps(u.st, u.doc.Application, &curl, false) 1553 if err != nil { 1554 return nil, errors.Trace(err) 1555 } 1556 1557 // Set the new charm URL. 1558 differentCharm := bson.D{{"charmurl", bson.D{{"$ne", curl}}}} 1559 ops := append(incOps, 1560 txn.Op{ 1561 C: unitsC, 1562 Id: u.doc.DocID, 1563 Assert: append(notDeadDoc, differentCharm...), 1564 Update: bson.D{{"$set", bson.D{{"charmurl", curl}}}}, 1565 }) 1566 1567 unitCURL := u.doc.CharmURL 1568 if unitCURL != nil { 1569 // Drop the reference to the old charm. 1570 // Since we can force this now, let's.. There is no point hanging on to the old charm. 1571 op := &ForcedOperation{Force: true} 1572 decOps, err := appCharmDecRefOps(u.st, u.doc.Application, unitCURL, true, op) 1573 if err != nil { 1574 // No need to stop further processing if the old key could not be removed. 1575 logger.Errorf("could not remove old charm references for %s: %v", unitCURL, err) 1576 } 1577 if len(op.Errors) != 0 { 1578 logger.Errorf("could not remove old charm references for %s: %v", unitCURL, op.Errors) 1579 } 1580 ops = append(ops, decOps...) 1581 } 1582 return ops, nil 1583 } 1584 err := u.st.db().Run(buildTxn) 1585 if err == nil { 1586 u.doc.CharmURL = &curl 1587 } 1588 return err 1589 } 1590 1591 // charm returns the charm for the unit, or the application if the unit's charm 1592 // has not been set yet. 1593 func (u *Unit) charm() (*Charm, error) { 1594 cURL := u.CharmURL() 1595 if cURL == nil { 1596 app, err := u.Application() 1597 if err != nil { 1598 return nil, err 1599 } 1600 cURL, _ = app.CharmURL() 1601 } 1602 1603 if cURL == nil { 1604 return nil, errors.Errorf("missing charm URL for %q", u.Name()) 1605 } 1606 ch, err := u.st.Charm(*cURL) 1607 return ch, errors.Annotatef(err, "getting charm for %s", u) 1608 } 1609 1610 // assertCharmOps returns txn.Ops to assert the current charm of the unit. 1611 // If the unit currently has no charm URL set, then the application's charm 1612 // URL will be checked by the txn.Ops also. 1613 func (u *Unit) assertCharmOps(ch *Charm) []txn.Op { 1614 ops := []txn.Op{{ 1615 C: unitsC, 1616 Id: u.doc.Name, 1617 Assert: bson.D{{"charmurl", u.doc.CharmURL}}, 1618 }} 1619 if u.doc.CharmURL != nil { 1620 appName := u.ApplicationName() 1621 ops = append(ops, txn.Op{ 1622 C: applicationsC, 1623 Id: appName, 1624 Assert: bson.D{{"charmurl", ch.URL()}}, 1625 }) 1626 } 1627 return ops 1628 } 1629 1630 // Tag returns a name identifying the unit. 1631 // The returned name will be different from other Tag values returned by any 1632 // other entities from the same state. 1633 func (u *Unit) Tag() names.Tag { 1634 return u.UnitTag() 1635 } 1636 1637 // UnitTag returns a names.UnitTag representing this Unit, unless the 1638 // unit Name is invalid, in which case it will panic 1639 func (u *Unit) UnitTag() names.UnitTag { 1640 return names.NewUnitTag(u.Name()) 1641 } 1642 1643 func unitNotAssignedError(u *Unit) error { 1644 msg := fmt.Sprintf("unit %q is not assigned to a machine", u) 1645 return errors.NewNotAssigned(nil, msg) 1646 } 1647 1648 // AssignedMachineId returns the id of the assigned machine. 1649 func (u *Unit) AssignedMachineId() (id string, err error) { 1650 if u.doc.MachineId == "" { 1651 return "", unitNotAssignedError(u) 1652 } 1653 return u.doc.MachineId, nil 1654 } 1655 1656 var ( 1657 machineNotCleanErr = errors.New("machine is dirty") 1658 alreadyAssignedErr = errors.New("unit is already assigned to a machine") 1659 inUseErr = errors.New("machine is not unused") 1660 ) 1661 1662 // assignToMachine is the internal version of AssignToMachine. 1663 func (u *Unit) assignToMachine(m *Machine, unused bool) (err error) { 1664 buildTxn := func(attempt int) ([]txn.Op, error) { 1665 u, m := u, m // don't change outer vars 1666 if attempt > 0 { 1667 var err error 1668 u, err = u.st.Unit(u.Name()) 1669 if err != nil { 1670 return nil, errors.Trace(err) 1671 } 1672 m, err = u.st.Machine(m.Id()) 1673 if err != nil { 1674 return nil, errors.Trace(err) 1675 } 1676 } 1677 return u.assignToMachineOps(m, unused) 1678 } 1679 if err := u.st.db().Run(buildTxn); err != nil { 1680 return errors.Trace(err) 1681 } 1682 u.doc.MachineId = m.doc.Id 1683 m.doc.Clean = false 1684 m.doc.Principals = append(m.doc.Principals, u.doc.Name) 1685 sort.Strings(m.doc.Principals) 1686 return nil 1687 } 1688 1689 // assignToMachineOps returns txn.Ops to assign a unit to a machine. 1690 // assignToMachineOps returns specific errors in some cases: 1691 // - machineNotAliveErr when the machine is not alive. 1692 // - unitNotAliveErr when the unit is not alive. 1693 // - alreadyAssignedErr when the unit has already been assigned 1694 // - inUseErr when the machine already has a unit assigned (if unused is true) 1695 func (u *Unit) assignToMachineOps(m *Machine, unused bool) ([]txn.Op, error) { 1696 if u.Life() != Alive { 1697 return nil, unitNotAliveErr 1698 } 1699 if u.doc.MachineId != "" { 1700 if u.doc.MachineId != m.Id() { 1701 return nil, alreadyAssignedErr 1702 } 1703 return nil, jujutxn.ErrNoOperations 1704 } 1705 if unused && !m.doc.Clean { 1706 return nil, inUseErr 1707 } 1708 storageParams, err := u.storageParams() 1709 if err != nil { 1710 return nil, errors.Trace(err) 1711 } 1712 sb, err := NewStorageBackend(u.st) 1713 if err != nil { 1714 return nil, errors.Trace(err) 1715 } 1716 storagePools, err := storagePools(sb, storageParams) 1717 if err != nil { 1718 return nil, errors.Trace(err) 1719 } 1720 if err := validateUnitMachineAssignment( 1721 m, u.doc.Base, u.doc.Principal != "", storagePools, 1722 ); err != nil { 1723 return nil, errors.Trace(err) 1724 } 1725 storageOps, volumesAttached, filesystemsAttached, err := sb.hostStorageOps(m.doc.Id, storageParams) 1726 if err != nil { 1727 return nil, errors.Trace(err) 1728 } 1729 // addMachineStorageAttachmentsOps will add a txn.Op that ensures 1730 // that no filesystems were concurrently added to the machine if 1731 // any of the filesystems being attached specify a location. 1732 attachmentOps, err := addMachineStorageAttachmentsOps( 1733 m, volumesAttached, filesystemsAttached, 1734 ) 1735 if err != nil { 1736 return nil, errors.Trace(err) 1737 } 1738 storageOps = append(storageOps, attachmentOps...) 1739 1740 assert := append(isAliveDoc, bson.D{{ 1741 // The unit's subordinates must not change while we're 1742 // assigning it to a machine, to ensure machine storage 1743 // is created for subordinate units. 1744 "subordinates", u.doc.Subordinates, 1745 }, { 1746 "$or", []bson.D{ 1747 {{"machineid", ""}}, 1748 {{"machineid", m.Id()}}, 1749 }, 1750 }}...) 1751 massert := append(isAliveDoc, bson.D{{ 1752 // The machine must be able to accept a unit. 1753 "jobs", bson.M{"$in": []MachineJob{JobHostUnits}}, 1754 }}...) 1755 if unused { 1756 massert = append(massert, bson.D{{"clean", bson.D{{"$ne", false}}}}...) 1757 } 1758 ops := []txn.Op{{ 1759 C: unitsC, 1760 Id: u.doc.DocID, 1761 Assert: assert, 1762 Update: bson.D{{"$set", bson.D{{"machineid", m.doc.Id}}}}, 1763 }, { 1764 C: machinesC, 1765 Id: m.doc.DocID, 1766 Assert: massert, 1767 Update: bson.D{{"$addToSet", bson.D{{"principals", u.doc.Name}}}, {"$set", bson.D{{"clean", false}}}}, 1768 }, 1769 removeStagedAssignmentOp(u.doc.DocID), 1770 } 1771 ops = append(ops, storageOps...) 1772 return ops, nil 1773 } 1774 1775 // validateUnitMachineAssignment validates the parameters for assigning a unit 1776 // to a specified machine. 1777 func validateUnitMachineAssignment( 1778 m *Machine, 1779 base Base, 1780 isSubordinate bool, 1781 storagePools set.Strings, 1782 ) (err error) { 1783 if m.Life() != Alive { 1784 return machineNotAliveErr 1785 } 1786 if isSubordinate { 1787 return fmt.Errorf("unit is a subordinate") 1788 } 1789 if !base.compatibleWith(m.doc.Base) { 1790 return fmt.Errorf("base does not match: unit has %q, machine has %q", base.DisplayString(), m.doc.Base.DisplayString()) 1791 } 1792 canHost := false 1793 for _, j := range m.doc.Jobs { 1794 if j == JobHostUnits { 1795 canHost = true 1796 break 1797 } 1798 } 1799 if !canHost { 1800 return fmt.Errorf("machine %q cannot host units", m) 1801 } 1802 sb, err := NewStorageBackend(m.st) 1803 if err != nil { 1804 return errors.Trace(err) 1805 } 1806 if err := validateDynamicMachineStoragePools(sb, m, storagePools); err != nil { 1807 return errors.Trace(err) 1808 } 1809 return nil 1810 } 1811 1812 // validateDynamicMachineStorageParams validates that the provided machine 1813 // storage parameters are compatible with the specified machine. 1814 func validateDynamicMachineStorageParams(m *Machine, params *storageParams) error { 1815 sb, err := NewStorageBackend(m.st) 1816 if err != nil { 1817 return errors.Trace(err) 1818 } 1819 pools, err := storagePools(sb, params) 1820 if err != nil { 1821 return err 1822 } 1823 if err := validateDynamicMachineStoragePools(sb, m, pools); err != nil { 1824 return err 1825 } 1826 // Validate the volume/filesystem attachments for the machine. 1827 for volumeTag := range params.volumeAttachments { 1828 volume, err := getVolumeByTag(sb.mb, volumeTag) 1829 if err != nil { 1830 return errors.Trace(err) 1831 } 1832 if !volume.Detachable() && volume.doc.HostId != m.Id() { 1833 return errors.Errorf( 1834 "storage is non-detachable (bound to machine %s)", 1835 volume.doc.HostId, 1836 ) 1837 } 1838 } 1839 for filesystemTag := range params.filesystemAttachments { 1840 filesystem, err := getFilesystemByTag(sb.mb, filesystemTag) 1841 if err != nil { 1842 return errors.Trace(err) 1843 } 1844 if !filesystem.Detachable() && filesystem.doc.HostId != m.Id() { 1845 host := storageAttachmentHost(filesystem.doc.HostId) 1846 return errors.Errorf( 1847 "storage is non-detachable (bound to %s)", 1848 names.ReadableString(host), 1849 ) 1850 } 1851 } 1852 return nil 1853 } 1854 1855 // storagePools returns the names of storage pools in each of the 1856 // volume, filesystem and attachments in the machine storage parameters. 1857 func storagePools(sb *storageBackend, params *storageParams) (set.Strings, error) { 1858 pools := make(set.Strings) 1859 for _, v := range params.volumes { 1860 v, err := sb.volumeParamsWithDefaults(v.Volume) 1861 if err != nil { 1862 return nil, errors.Trace(err) 1863 } 1864 pools.Add(v.Pool) 1865 } 1866 for _, f := range params.filesystems { 1867 f, err := sb.filesystemParamsWithDefaults(f.Filesystem) 1868 if err != nil { 1869 return nil, errors.Trace(err) 1870 } 1871 pools.Add(f.Pool) 1872 } 1873 for volumeTag := range params.volumeAttachments { 1874 volume, err := sb.Volume(volumeTag) 1875 if err != nil { 1876 return nil, errors.Trace(err) 1877 } 1878 if params, ok := volume.Params(); ok { 1879 pools.Add(params.Pool) 1880 } else { 1881 info, err := volume.Info() 1882 if err != nil { 1883 return nil, errors.Trace(err) 1884 } 1885 pools.Add(info.Pool) 1886 } 1887 } 1888 for filesystemTag := range params.filesystemAttachments { 1889 filesystem, err := sb.Filesystem(filesystemTag) 1890 if err != nil { 1891 return nil, errors.Trace(err) 1892 } 1893 if params, ok := filesystem.Params(); ok { 1894 pools.Add(params.Pool) 1895 } else { 1896 info, err := filesystem.Info() 1897 if err != nil { 1898 return nil, errors.Trace(err) 1899 } 1900 pools.Add(info.Pool) 1901 } 1902 } 1903 return pools, nil 1904 } 1905 1906 // validateDynamicMachineStoragePools validates that all of the specified 1907 // storage pools support dynamic storage provisioning. If any provider doesn't 1908 // support dynamic storage, then an IsNotSupported error is returned. 1909 func validateDynamicMachineStoragePools(sb *storageBackend, m *Machine, pools set.Strings) error { 1910 if pools.IsEmpty() { 1911 return nil 1912 } 1913 if m.ContainerType() != "" { 1914 // TODO(axw) consult storage providers to check if they 1915 // support adding storage to containers. Loop is fine, 1916 // for example. 1917 // 1918 // TODO(axw) later we might allow *any* storage, and 1919 // passthrough/bindmount storage. That would imply either 1920 // container creation time only, or requiring containers 1921 // to be restarted to pick up new configuration. 1922 return errors.NotSupportedf("adding storage to %s container", m.ContainerType()) 1923 } 1924 return validateDynamicStoragePools(sb, pools) 1925 } 1926 1927 // validateDynamicStoragePools validates that all of the specified storage 1928 // providers support dynamic storage provisioning. If any provider doesn't 1929 // support dynamic storage, then an IsNotSupported error is returned. 1930 func validateDynamicStoragePools(sb *storageBackend, pools set.Strings) error { 1931 for pool := range pools { 1932 providerType, provider, _, err := poolStorageProvider(sb, pool) 1933 if err != nil { 1934 return errors.Trace(err) 1935 } 1936 if !provider.Dynamic() { 1937 return errors.NewNotSupported(err, fmt.Sprintf( 1938 "%q storage provider does not support dynamic storage", 1939 providerType, 1940 )) 1941 } 1942 } 1943 return nil 1944 } 1945 1946 func assignContextf(err *error, unitName string, target string) { 1947 if *err != nil { 1948 *err = errors.Annotatef(*err, 1949 "cannot assign unit %q to %s", 1950 unitName, target, 1951 ) 1952 } 1953 } 1954 1955 // AssignToMachine assigns this unit to a given machine. 1956 func (u *Unit) AssignToMachine(m *Machine) (err error) { 1957 defer assignContextf(&err, u.Name(), fmt.Sprintf("machine %s", m)) 1958 if u.doc.Principal != "" { 1959 return fmt.Errorf("unit is a subordinate") 1960 } 1961 return u.assignToMachine(m, false) 1962 } 1963 1964 // assignToNewMachineOps returns txn.Ops to assign the unit to a machine 1965 // created according to the supplied params, with the supplied constraints. 1966 func (u *Unit) assignToNewMachineOps( 1967 template MachineTemplate, 1968 parentId string, 1969 containerType instance.ContainerType, 1970 ) (*Machine, []txn.Op, error) { 1971 1972 if u.Life() != Alive { 1973 return nil, nil, unitNotAliveErr 1974 } 1975 if u.doc.MachineId != "" { 1976 return nil, nil, alreadyAssignedErr 1977 } 1978 1979 template.principals = []string{u.doc.Name} 1980 template.Dirty = true 1981 1982 var ( 1983 mdoc *machineDoc 1984 ops []txn.Op 1985 err error 1986 ) 1987 switch { 1988 case parentId == "" && containerType == "": 1989 mdoc, ops, err = u.st.addMachineOps(template) 1990 case parentId == "": 1991 if containerType == "" { 1992 return nil, nil, errors.New("assignToNewMachine called without container type (should never happen)") 1993 } 1994 // The new parent machine is clean and only hosts units, 1995 // regardless of its child. 1996 parentParams := template 1997 parentParams.Jobs = []MachineJob{JobHostUnits} 1998 mdoc, ops, err = u.st.addMachineInsideNewMachineOps(template, parentParams, containerType) 1999 default: 2000 mdoc, ops, err = u.st.addMachineInsideMachineOps(template, parentId, containerType) 2001 } 2002 if err != nil { 2003 return nil, nil, err 2004 } 2005 2006 // Ensure the host machine is really clean. 2007 if parentId != "" { 2008 mparent, err := u.st.Machine(parentId) 2009 if err != nil { 2010 return nil, nil, err 2011 } 2012 if !mparent.Clean() { 2013 return nil, nil, machineNotCleanErr 2014 } 2015 containers, err := mparent.Containers() 2016 if err != nil { 2017 return nil, nil, err 2018 } 2019 if len(containers) > 0 { 2020 return nil, nil, machineNotCleanErr 2021 } 2022 parentDocId := u.st.docID(parentId) 2023 ops = append(ops, txn.Op{ 2024 C: machinesC, 2025 Id: parentDocId, 2026 Assert: bson.D{{"clean", true}}, 2027 }, txn.Op{ 2028 C: containerRefsC, 2029 Id: parentDocId, 2030 Assert: bson.D{hasNoContainersTerm}, 2031 }) 2032 } 2033 2034 // The unit's subordinates must not change while we're 2035 // assigning it to a machine, to ensure machine storage 2036 // is created for subordinate units. 2037 subordinatesUnchanged := bson.D{{"subordinates", u.doc.Subordinates}} 2038 isUnassigned := bson.D{{"machineid", ""}} 2039 asserts := append(isAliveDoc, isUnassigned...) 2040 asserts = append(asserts, subordinatesUnchanged...) 2041 2042 ops = append(ops, txn.Op{ 2043 C: unitsC, 2044 Id: u.doc.DocID, 2045 Assert: asserts, 2046 Update: bson.D{{"$set", bson.D{{"machineid", mdoc.Id}}}}, 2047 }, 2048 removeStagedAssignmentOp(u.doc.DocID), 2049 ) 2050 return &Machine{u.st, *mdoc}, ops, nil 2051 } 2052 2053 // Constraints returns the unit's deployment constraints. 2054 func (u *Unit) Constraints() (*constraints.Value, error) { 2055 cons, err := readConstraints(u.st, u.globalAgentKey()) 2056 if errors.IsNotFound(err) { 2057 // Lack of constraints indicates lack of unit. 2058 return nil, errors.NotFoundf("unit") 2059 } else if err != nil { 2060 return nil, err 2061 } 2062 if !cons.HasArch() && !cons.HasInstanceType() { 2063 app, err := u.Application() 2064 if err != nil { 2065 return nil, errors.Trace(err) 2066 } 2067 if origin := app.CharmOrigin(); origin != nil && origin.Platform != nil { 2068 if origin.Platform.Architecture != "" { 2069 cons.Arch = &origin.Platform.Architecture 2070 } 2071 } 2072 if !cons.HasArch() { 2073 a := constraints.ArchOrDefault(cons, nil) 2074 cons.Arch = &a 2075 } 2076 } 2077 return &cons, nil 2078 } 2079 2080 // AssignToNewMachineOrContainer assigns the unit to a new machine, 2081 // with constraints determined according to the application and 2082 // model constraints at the time of unit creation. If a 2083 // container is required, a clean, empty machine instance is required 2084 // on which to create the container. An existing clean, empty instance 2085 // is first searched for, and if not found, a new one is created. 2086 func (u *Unit) AssignToNewMachineOrContainer() (err error) { 2087 defer assignContextf(&err, u.Name(), "new machine or container") 2088 if u.doc.Principal != "" { 2089 return fmt.Errorf("unit is a subordinate") 2090 } 2091 cons, err := u.Constraints() 2092 if err != nil { 2093 return err 2094 } 2095 if !cons.HasContainer() { 2096 return u.AssignToNewMachine() 2097 } 2098 2099 // Find a clean, empty machine on which to create a container. 2100 hostCons := *cons 2101 noContainer := instance.NONE 2102 hostCons.Container = &noContainer 2103 query, err := u.findCleanMachineQuery(true, &hostCons) 2104 if err != nil { 2105 return err 2106 } 2107 machinesCollection, closer := u.st.db().GetCollection(machinesC) 2108 defer closer() 2109 var host machineDoc 2110 if err := machinesCollection.Find(query).One(&host); err == mgo.ErrNotFound { 2111 // No existing clean, empty machine so create a new one. The 2112 // container constraint will be used by AssignToNewMachine to 2113 // create the required container. 2114 return u.AssignToNewMachine() 2115 } else if err != nil { 2116 return err 2117 } 2118 2119 var m *Machine 2120 buildTxn := func(attempt int) ([]txn.Op, error) { 2121 var err error 2122 u := u // don't change outer var 2123 if attempt > 0 { 2124 u, err = u.st.Unit(u.Name()) 2125 if err != nil { 2126 return nil, errors.Trace(err) 2127 } 2128 } 2129 template := MachineTemplate{ 2130 Base: u.doc.Base, 2131 Constraints: *cons, 2132 Jobs: []MachineJob{JobHostUnits}, 2133 } 2134 var ops []txn.Op 2135 m, ops, err = u.assignToNewMachineOps(template, host.Id, *cons.Container) 2136 return ops, err 2137 } 2138 if err := u.st.db().Run(buildTxn); err != nil { 2139 if errors.Cause(err) == machineNotCleanErr { 2140 // The clean machine was used before we got a chance 2141 // to use it so just stick the unit on a new machine. 2142 return u.AssignToNewMachine() 2143 } 2144 return errors.Trace(err) 2145 } 2146 u.doc.MachineId = m.doc.Id 2147 return nil 2148 } 2149 2150 // AssignToNewMachine assigns the unit to a new machine, with constraints 2151 // determined according to the application and model constraints at the 2152 // time of unit creation. 2153 func (u *Unit) AssignToNewMachine() (err error) { 2154 defer assignContextf(&err, u.Name(), "new machine") 2155 return u.assignToNewMachine("") 2156 } 2157 2158 // assignToNewMachine assigns the unit to a new machine with the 2159 // optional placement directive, with constraints determined according 2160 // to the application and model constraints at the time of unit creation. 2161 func (u *Unit) assignToNewMachine(placement string) error { 2162 if u.doc.Principal != "" { 2163 return fmt.Errorf("unit is a subordinate") 2164 } 2165 var m *Machine 2166 buildTxn := func(attempt int) ([]txn.Op, error) { 2167 var err error 2168 u := u // don't change outer var 2169 if attempt > 0 { 2170 u, err = u.st.Unit(u.Name()) 2171 if err != nil { 2172 return nil, errors.Trace(err) 2173 } 2174 } 2175 cons, err := u.Constraints() 2176 if err != nil { 2177 return nil, err 2178 } 2179 var containerType instance.ContainerType 2180 if cons.HasContainer() { 2181 containerType = *cons.Container 2182 } 2183 storageParams, err := u.storageParams() 2184 if err != nil { 2185 return nil, errors.Trace(err) 2186 } 2187 template := MachineTemplate{ 2188 Base: u.doc.Base, 2189 Constraints: *cons, 2190 Jobs: []MachineJob{JobHostUnits}, 2191 Placement: placement, 2192 Dirty: placement != "", 2193 Volumes: storageParams.volumes, 2194 VolumeAttachments: storageParams.volumeAttachments, 2195 Filesystems: storageParams.filesystems, 2196 FilesystemAttachments: storageParams.filesystemAttachments, 2197 } 2198 // Get the ops necessary to create a new machine, and the 2199 // machine doc that will be added with those operations 2200 // (which includes the machine id). 2201 var ops []txn.Op 2202 m, ops, err = u.assignToNewMachineOps(template, "", containerType) 2203 return ops, err 2204 } 2205 if err := u.st.db().Run(buildTxn); err != nil { 2206 return errors.Trace(err) 2207 } 2208 u.doc.MachineId = m.doc.Id 2209 return nil 2210 } 2211 2212 type byStorageInstance []StorageAttachment 2213 2214 func (b byStorageInstance) Len() int { return len(b) } 2215 func (b byStorageInstance) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 2216 2217 func (b byStorageInstance) Less(i, j int) bool { 2218 return b[i].StorageInstance().String() < b[j].StorageInstance().String() 2219 } 2220 2221 // storageParams returns parameters for creating volumes/filesystems 2222 // and volume/filesystem attachments when a unit is instantiated. 2223 func (u *Unit) storageParams() (*storageParams, error) { 2224 params, err := unitStorageParams(u) 2225 if err != nil { 2226 return nil, errors.Trace(err) 2227 } 2228 for _, name := range u.doc.Subordinates { 2229 sub, err := u.st.Unit(name) 2230 if err != nil { 2231 return nil, errors.Trace(err) 2232 } 2233 subParams, err := unitStorageParams(sub) 2234 if err != nil { 2235 return nil, errors.Trace(err) 2236 } 2237 params = combineStorageParams(params, subParams) 2238 } 2239 return params, nil 2240 } 2241 2242 func unitStorageParams(u *Unit) (*storageParams, error) { 2243 sb, err := NewStorageBackend(u.st) 2244 if err != nil { 2245 return nil, errors.Trace(err) 2246 } 2247 storageAttachments, err := sb.UnitStorageAttachments(u.UnitTag()) 2248 if err != nil { 2249 return nil, errors.Annotate(err, "getting storage attachments") 2250 } 2251 ch, err := u.charm() 2252 if err != nil { 2253 return nil, errors.Annotate(err, "getting charm") 2254 } 2255 2256 // Sort storage attachments so the volume ids are consistent (for testing). 2257 sort.Sort(byStorageInstance(storageAttachments)) 2258 2259 var storageInstances []*storageInstance 2260 for _, storageAttachment := range storageAttachments { 2261 storage, err := sb.storageInstance(storageAttachment.StorageInstance()) 2262 if err != nil { 2263 return nil, errors.Annotatef(err, "getting storage instance") 2264 } 2265 storageInstances = append(storageInstances, storage) 2266 } 2267 return storageParamsForUnit(sb, storageInstances, u.UnitTag(), u.Base(), ch.Meta()) 2268 } 2269 2270 func storageParamsForUnit( 2271 sb *storageBackend, storageInstances []*storageInstance, tag names.UnitTag, base Base, chMeta *charm.Meta, 2272 ) (*storageParams, error) { 2273 2274 var volumes []HostVolumeParams 2275 var filesystems []HostFilesystemParams 2276 volumeAttachments := make(map[names.VolumeTag]VolumeAttachmentParams) 2277 filesystemAttachments := make(map[names.FilesystemTag]FilesystemAttachmentParams) 2278 for _, storage := range storageInstances { 2279 storageParams, err := storageParamsForStorageInstance( 2280 sb, chMeta, base.OS, storage, 2281 ) 2282 if err != nil { 2283 return nil, errors.Trace(err) 2284 } 2285 2286 volumes = append(volumes, storageParams.volumes...) 2287 for k, v := range storageParams.volumeAttachments { 2288 volumeAttachments[k] = v 2289 } 2290 2291 filesystems = append(filesystems, storageParams.filesystems...) 2292 for k, v := range storageParams.filesystemAttachments { 2293 filesystemAttachments[k] = v 2294 } 2295 } 2296 result := &storageParams{ 2297 volumes, 2298 volumeAttachments, 2299 filesystems, 2300 filesystemAttachments, 2301 } 2302 return result, nil 2303 } 2304 2305 // storageParamsForStorageInstance returns parameters for creating 2306 // volumes/filesystems and volume/filesystem attachments for a host that 2307 // the unit will be assigned to. These parameters are based on a given storage 2308 // instance. 2309 func storageParamsForStorageInstance( 2310 sb *storageBackend, 2311 charmMeta *charm.Meta, 2312 osName string, 2313 storage *storageInstance, 2314 ) (*storageParams, error) { 2315 2316 charmStorage := charmMeta.Storage[storage.StorageName()] 2317 2318 var volumes []HostVolumeParams 2319 var filesystems []HostFilesystemParams 2320 volumeAttachments := make(map[names.VolumeTag]VolumeAttachmentParams) 2321 filesystemAttachments := make(map[names.FilesystemTag]FilesystemAttachmentParams) 2322 2323 switch storage.Kind() { 2324 case StorageKindFilesystem: 2325 location, err := FilesystemMountPoint(charmStorage, storage.StorageTag(), osName) 2326 if err != nil { 2327 return nil, errors.Annotatef( 2328 err, "getting filesystem mount point for storage %s", 2329 storage.StorageName(), 2330 ) 2331 } 2332 filesystemAttachmentParams := FilesystemAttachmentParams{ 2333 locationAutoGenerated: charmStorage.Location == "", // auto-generated location 2334 Location: location, 2335 ReadOnly: charmStorage.ReadOnly, 2336 } 2337 var volumeBacked bool 2338 if filesystem, err := sb.StorageInstanceFilesystem(storage.StorageTag()); err == nil { 2339 // The filesystem already exists, so just attach it. 2340 // When creating ops to attach the storage to the 2341 // machine, we will check if the attachment already 2342 // exists, and whether the storage can be attached to 2343 // the machine. 2344 if !charmStorage.Shared { 2345 // The storage is not shared, so make sure that it is 2346 // not currently attached to any other machine. If it 2347 // is, it should be in the process of being detached. 2348 existing, err := sb.FilesystemAttachments(filesystem.FilesystemTag()) 2349 if err != nil { 2350 return nil, errors.Trace(err) 2351 } 2352 if len(existing) > 0 { 2353 return nil, errors.Errorf( 2354 "%s is attached to %s", 2355 names.ReadableString(filesystem.FilesystemTag()), 2356 names.ReadableString(existing[0].Host()), 2357 ) 2358 } 2359 } 2360 filesystemAttachments[filesystem.FilesystemTag()] = filesystemAttachmentParams 2361 if _, err := filesystem.Volume(); err == nil { 2362 // The filesystem is volume-backed, so make sure we attach the volume too. 2363 volumeBacked = true 2364 } 2365 } else if errors.IsNotFound(err) { 2366 filesystemParams := FilesystemParams{ 2367 storage: storage.StorageTag(), 2368 Pool: storage.doc.Constraints.Pool, 2369 Size: storage.doc.Constraints.Size, 2370 } 2371 filesystems = append(filesystems, HostFilesystemParams{ 2372 filesystemParams, filesystemAttachmentParams, 2373 }) 2374 } else { 2375 return nil, errors.Annotatef(err, "getting filesystem for storage %q", storage.Tag().Id()) 2376 } 2377 2378 if !volumeBacked { 2379 break 2380 } 2381 // Fall through to attach the volume that backs the filesystem. 2382 fallthrough 2383 2384 case StorageKindBlock: 2385 volumeAttachmentParams := VolumeAttachmentParams{ 2386 charmStorage.ReadOnly, 2387 } 2388 if volume, err := sb.StorageInstanceVolume(storage.StorageTag()); err == nil { 2389 // The volume already exists, so just attach it. When 2390 // creating ops to attach the storage to the machine, 2391 // we will check if the attachment already exists, and 2392 // whether the storage can be attached to the machine. 2393 if !charmStorage.Shared { 2394 // The storage is not shared, so make sure that it is 2395 // not currently attached to any other machine. If it 2396 // is, it should be in the process of being detached. 2397 existing, err := sb.VolumeAttachments(volume.VolumeTag()) 2398 if err != nil { 2399 return nil, errors.Trace(err) 2400 } 2401 if len(existing) > 0 { 2402 return nil, errors.Errorf( 2403 "%s is attached to %s", 2404 names.ReadableString(volume.VolumeTag()), 2405 names.ReadableString(existing[0].Host()), 2406 ) 2407 } 2408 } 2409 volumeAttachments[volume.VolumeTag()] = volumeAttachmentParams 2410 } else if errors.IsNotFound(err) { 2411 volumeParams := VolumeParams{ 2412 storage: storage.StorageTag(), 2413 Pool: storage.doc.Constraints.Pool, 2414 Size: storage.doc.Constraints.Size, 2415 } 2416 volumes = append(volumes, HostVolumeParams{ 2417 volumeParams, volumeAttachmentParams, 2418 }) 2419 } else { 2420 return nil, errors.Annotatef(err, "getting volume for storage %q", storage.Tag().Id()) 2421 } 2422 default: 2423 return nil, errors.Errorf("invalid storage kind %v", storage.Kind()) 2424 } 2425 result := &storageParams{ 2426 volumes, 2427 volumeAttachments, 2428 filesystems, 2429 filesystemAttachments, 2430 } 2431 return result, nil 2432 } 2433 2434 var noCleanMachines = errors.New("all eligible machines in use") 2435 2436 // AssignToCleanMachine assigns u to a machine which is marked as clean. A machine 2437 // is clean if it has never had any principal units assigned to it. 2438 // If there are no clean machines besides any machine(s) running JobHostEnviron, 2439 // an error is returned. 2440 // This method does not take constraints into consideration when choosing a 2441 // machine (lp:1161919). 2442 func (u *Unit) AssignToCleanMachine() (m *Machine, err error) { 2443 return u.assignToCleanMaybeEmptyMachine(false) 2444 } 2445 2446 // AssignToCleanEmptyMachine assigns u to a machine which is marked as clean and is also 2447 // not hosting any containers. A machine is clean if it has never had any principal units 2448 // assigned to it. If there are no clean machines besides any machine(s) running JobHostEnviron, 2449 // an error is returned. 2450 // This method does not take constraints into consideration when choosing a 2451 // machine (lp:1161919). 2452 func (u *Unit) AssignToCleanEmptyMachine() (m *Machine, err error) { 2453 return u.assignToCleanMaybeEmptyMachine(true) 2454 } 2455 2456 var hasContainerTerm = bson.DocElem{ 2457 "$and", []bson.D{ 2458 {{"children", bson.D{{"$not", bson.D{{"$size", 0}}}}}}, 2459 {{"children", bson.D{{"$exists", true}}}}, 2460 }} 2461 2462 var hasNoContainersTerm = bson.DocElem{ 2463 "$or", []bson.D{ 2464 {{"children", bson.D{{"$size", 0}}}}, 2465 {{"children", bson.D{{"$exists", false}}}}, 2466 }} 2467 2468 // findCleanMachineQuery returns a Mongo query to find clean (and maybe empty) 2469 // machines with characteristics matching the specified constraints. 2470 func (u *Unit) findCleanMachineQuery(requireEmpty bool, cons *constraints.Value) (bson.D, error) { 2471 db, dbCloser := u.st.newDB() 2472 defer dbCloser() 2473 2474 // Select all machines that can accept principal units and are clean. 2475 var containerRefs []machineContainers 2476 // If we need empty machines, first build up a list of machine ids which 2477 // have containers so we can exclude those. 2478 if requireEmpty { 2479 containerRefsCollection, cCloser := db.GetCollection(containerRefsC) 2480 defer cCloser() 2481 2482 err := containerRefsCollection.Find(bson.D{hasContainerTerm}).All(&containerRefs) 2483 if err != nil { 2484 return nil, errors.Trace(err) 2485 } 2486 } 2487 omitMachineIds := make([]string, len(containerRefs)) 2488 for i, cref := range containerRefs { 2489 omitMachineIds[i] = cref.Id 2490 } 2491 2492 // Exclude machines that are locked for series upgrade. 2493 locked, err := u.st.upgradeSeriesMachineIds() 2494 if err != nil { 2495 return nil, errors.Trace(err) 2496 } 2497 omitMachineIds = append(omitMachineIds, locked...) 2498 2499 // Also exclude containers on machines locked for series upgrade. 2500 for _, id := range locked { 2501 m, err := u.st.Machine(id) 2502 if err != nil { 2503 return nil, errors.Trace(err) 2504 } 2505 cIds, err := m.Containers() 2506 if err != nil && !errors.IsNotFound(err) { 2507 return nil, errors.Trace(err) 2508 } 2509 omitMachineIds = append(omitMachineIds, cIds...) 2510 } 2511 2512 terms := bson.D{ 2513 {"life", Alive}, 2514 {"base", u.doc.Base}, 2515 {"jobs", []MachineJob{JobHostUnits}}, 2516 {"clean", true}, 2517 {"machineid", bson.D{{"$nin", omitMachineIds}}}, 2518 } 2519 // Add the container filter term if necessary. 2520 var containerType instance.ContainerType 2521 if cons.Container != nil { 2522 containerType = *cons.Container 2523 } 2524 if containerType == instance.NONE { 2525 terms = append(terms, bson.DocElem{"containertype", ""}) 2526 } else if containerType != "" { 2527 terms = append(terms, bson.DocElem{"containertype", string(containerType)}) 2528 } 2529 2530 // Find the ids of machines which satisfy any required hardware 2531 // constraints. If there is no instanceData for a machine, that 2532 // machine is not considered as suitable for deploying the unit. 2533 // This can happen if the machine is not yet provisioned. It may 2534 // be that when the machine is provisioned it will be found to 2535 // be suitable, but we don't know that right now and it's best 2536 // to err on the side of caution and exclude such machines. 2537 var suitableTerms bson.D 2538 if cons.HasArch() { 2539 suitableTerms = append(suitableTerms, bson.DocElem{"arch", *cons.Arch}) 2540 } 2541 if cons.HasMem() { 2542 suitableTerms = append(suitableTerms, bson.DocElem{"mem", bson.D{{"$gte", *cons.Mem}}}) 2543 } 2544 if cons.RootDisk != nil && *cons.RootDisk > 0 { 2545 suitableTerms = append(suitableTerms, bson.DocElem{"rootdisk", bson.D{{"$gte", *cons.RootDisk}}}) 2546 } 2547 if cons.RootDiskSource != nil && *cons.RootDiskSource != "" { 2548 suitableTerms = append(suitableTerms, bson.DocElem{"rootdisksource", *cons.RootDiskSource}) 2549 } 2550 if cons.HasCpuCores() { 2551 suitableTerms = append(suitableTerms, bson.DocElem{"cpucores", bson.D{{"$gte", *cons.CpuCores}}}) 2552 } 2553 if cons.HasCpuPower() { 2554 suitableTerms = append(suitableTerms, bson.DocElem{"cpupower", bson.D{{"$gte", *cons.CpuPower}}}) 2555 } 2556 if cons.Tags != nil && len(*cons.Tags) > 0 { 2557 suitableTerms = append(suitableTerms, bson.DocElem{"tags", bson.D{{"$all", *cons.Tags}}}) 2558 } 2559 if cons.HasZones() { 2560 suitableTerms = append(suitableTerms, bson.DocElem{"availzone", bson.D{{"$in", *cons.Zones}}}) 2561 } 2562 // VirtType is orthogonal to the containertype, i.e. an LXC container can 2563 // be a container or a virtual machine. Once KVM is removed, we can drop 2564 // the containertype and rely just on the virt-type. 2565 if cons.HasVirtType() { 2566 suitableTerms = append(suitableTerms, bson.DocElem{"virttype", *cons.VirtType}) 2567 } 2568 if cons.HasImageID() { 2569 suitableTerms = append(suitableTerms, bson.DocElem{"imageid", *cons.ImageID}) 2570 } 2571 if len(suitableTerms) > 0 { 2572 instanceDataCollection, iCloser := db.GetCollection(instanceDataC) 2573 defer iCloser() 2574 2575 var suitableInstanceData []instanceData 2576 err := instanceDataCollection.Find(suitableTerms).Select(bson.M{"_id": 1}).All(&suitableInstanceData) 2577 if err != nil { 2578 return nil, err 2579 } 2580 var suitableIds = make([]string, len(suitableInstanceData)) 2581 for i, m := range suitableInstanceData { 2582 suitableIds[i] = m.DocID 2583 } 2584 terms = append(terms, bson.DocElem{"_id", bson.D{{"$in", suitableIds}}}) 2585 } 2586 return terms, nil 2587 } 2588 2589 // assignToCleanMaybeEmptyMachine implements AssignToCleanMachine and AssignToCleanEmptyMachine. 2590 // A 'machine' may be a machine instance or container depending on the application constraints. 2591 func (u *Unit) assignToCleanMaybeEmptyMachine(requireEmpty bool) (_ *Machine, err error) { 2592 context := "clean" 2593 if requireEmpty { 2594 context += ", empty" 2595 } 2596 context += " machine" 2597 defer assignContextf(&err, u.Name(), context) 2598 2599 if u.doc.Principal != "" { 2600 err = fmt.Errorf("unit is a subordinate") 2601 return nil, err 2602 } 2603 var m *Machine 2604 buildTxn := func(attempt int) ([]txn.Op, error) { 2605 var err error 2606 u := u // don't change outer var 2607 if attempt > 0 { 2608 u, err = u.st.Unit(u.Name()) 2609 if err != nil { 2610 return nil, errors.Trace(err) 2611 } 2612 } 2613 var ops []txn.Op 2614 m, ops, err = u.assignToCleanMaybeEmptyMachineOps(requireEmpty) 2615 return ops, err 2616 } 2617 if err := u.st.db().Run(buildTxn); err != nil { 2618 return nil, errors.Trace(err) 2619 } 2620 u.doc.MachineId = m.doc.Id 2621 m.doc.Clean = false 2622 return m, nil 2623 } 2624 2625 func (u *Unit) assignToCleanMaybeEmptyMachineOps(requireEmpty bool) (_ *Machine, _ []txn.Op, err error) { 2626 failure := func(err error) (*Machine, []txn.Op, error) { 2627 return nil, nil, err 2628 } 2629 2630 sb, err := NewStorageBackend(u.st) 2631 if err != nil { 2632 return failure(err) 2633 } 2634 2635 // If required storage is not all dynamic, then assigning 2636 // to a new machine is required. 2637 storageParams, err := u.storageParams() 2638 if err != nil { 2639 return failure(err) 2640 } 2641 storagePools, err := storagePools(sb, storageParams) 2642 if err != nil { 2643 return failure(err) 2644 } 2645 if err := validateDynamicStoragePools(sb, storagePools); err != nil { 2646 if errors.IsNotSupported(err) { 2647 return failure(noCleanMachines) 2648 } 2649 return failure(err) 2650 } 2651 2652 // Get the unit constraints to see what deployment requirements we have to adhere to. 2653 cons, err := u.Constraints() 2654 if err != nil { 2655 return failure(err) 2656 } 2657 query, err := u.findCleanMachineQuery(requireEmpty, cons) 2658 if err != nil { 2659 return failure(err) 2660 } 2661 2662 // Find all of the candidate machines, and associated 2663 // instances for those that are provisioned. Instances 2664 // will be distributed across in preference to 2665 // unprovisioned machines. 2666 machinesCollection, closer := u.st.db().GetCollection(machinesC) 2667 defer closer() 2668 var mdocs []*machineDoc 2669 if err := machinesCollection.Find(query).All(&mdocs); err != nil { 2670 return failure(err) 2671 } 2672 var unprovisioned []*Machine 2673 var instances []instance.Id 2674 instanceMachines := make(map[instance.Id]*Machine) 2675 for _, mdoc := range mdocs { 2676 m := newMachine(u.st, mdoc) 2677 inst, err := m.InstanceId() 2678 if errors.IsNotProvisioned(err) { 2679 unprovisioned = append(unprovisioned, m) 2680 } else if err != nil { 2681 return failure(err) 2682 } else { 2683 instances = append(instances, inst) 2684 instanceMachines[inst] = m 2685 } 2686 } 2687 2688 // Filter the list of instances that are suitable for 2689 // distribution, and then map them back to machines. 2690 // 2691 // TODO(axw) 2014-05-30 #1324904 2692 // Shuffle machines to reduce likelihood of collisions. 2693 // The partition of provisioned/unprovisioned machines 2694 // must be maintained. 2695 var limitZones []string 2696 if cons.HasZones() { 2697 limitZones = *cons.Zones 2698 } 2699 if instances, err = distributeUnit(u, instances, limitZones); err != nil { 2700 return failure(err) 2701 } 2702 machines := make([]*Machine, len(instances), len(instances)+len(unprovisioned)) 2703 for i, inst := range instances { 2704 m, ok := instanceMachines[inst] 2705 if !ok { 2706 err := fmt.Errorf("invalid instance returned: %v", inst) 2707 return failure(err) 2708 } 2709 machines[i] = m 2710 } 2711 machines = append(machines, unprovisioned...) 2712 2713 // TODO(axw) 2014-05-30 #1253704 2714 // We should not select a machine that is in the process 2715 // of being provisioned. There's no point asserting that 2716 // the machine hasn't been provisioned, as there will still 2717 // be a period of time during which the machine may be 2718 // provisioned without the fact having yet been recorded 2719 // in state. 2720 for _, m := range machines { 2721 // Check that the unit storage is compatible with 2722 // the machine in question. 2723 if err := validateDynamicMachineStorageParams(m, storageParams); err != nil { 2724 if errors.IsNotSupported(err) { 2725 continue 2726 } 2727 return failure(err) 2728 } 2729 ops, err := u.assignToMachineOps(m, true) 2730 if err == nil { 2731 return m, ops, nil 2732 } 2733 switch errors.Cause(err) { 2734 case inUseErr, machineNotAliveErr: 2735 default: 2736 return failure(err) 2737 } 2738 } 2739 return failure(noCleanMachines) 2740 } 2741 2742 // UnassignFromMachine removes the assignment between this unit and the 2743 // machine it's assigned to. 2744 func (u *Unit) UnassignFromMachine() (err error) { 2745 // TODO check local machine id and add an assert that the 2746 // machine id is as expected. 2747 ops := []txn.Op{{ 2748 C: unitsC, 2749 Id: u.doc.DocID, 2750 Assert: txn.DocExists, 2751 Update: bson.D{{"$set", bson.D{{"machineid", ""}}}}, 2752 }} 2753 if u.doc.MachineId != "" { 2754 ops = append(ops, txn.Op{ 2755 C: machinesC, 2756 Id: u.st.docID(u.doc.MachineId), 2757 Assert: txn.DocExists, 2758 Update: bson.D{{"$pull", bson.D{{"principals", u.doc.Name}}}}, 2759 }) 2760 } 2761 err = u.st.db().RunTransaction(ops) 2762 if err != nil { 2763 return fmt.Errorf("cannot unassign unit %q from machine: %v", u, onAbort(err, errors.NotFoundf("machine"))) 2764 } 2765 u.doc.MachineId = "" 2766 return nil 2767 } 2768 2769 // ActionSpecsByName is a map of action names to their respective ActionSpec. 2770 type ActionSpecsByName map[string]charm.ActionSpec 2771 2772 // PrepareActionPayload returns the payload to use in creating an action for this unit. 2773 // Note that the use of spec.InsertDefaults mutates payload. 2774 func (u *Unit) PrepareActionPayload(name string, payload map[string]interface{}, parallel *bool, executionGroup *string) (map[string]interface{}, bool, string, error) { 2775 if len(name) == 0 { 2776 return nil, false, "", errors.New("no action name given") 2777 } 2778 2779 // If the action is predefined inside juju, get spec from map 2780 spec, ok := actions.PredefinedActionsSpec[name] 2781 if !ok { 2782 specs, err := u.ActionSpecs() 2783 if err != nil { 2784 return nil, false, "", err 2785 } 2786 spec, ok = specs[name] 2787 if !ok { 2788 return nil, false, "", errors.Errorf("action %q not defined on unit %q", name, u.Name()) 2789 } 2790 } 2791 // Reject bad payloads before attempting to insert defaults. 2792 err := spec.ValidateParams(payload) 2793 if err != nil { 2794 return nil, false, "", errors.Trace(err) 2795 } 2796 payloadWithDefaults, err := spec.InsertDefaults(payload) 2797 if err != nil { 2798 return nil, false, "", errors.Trace(err) 2799 } 2800 2801 // For k8s operators, we run the action on the operator pod by default. 2802 if _, ok := payloadWithDefaults["workload-context"]; !ok { 2803 app, err := u.Application() 2804 if err != nil { 2805 return nil, false, "", errors.Trace(err) 2806 } 2807 ch, _, err := app.Charm() 2808 if err != nil { 2809 return nil, false, "", errors.Trace(err) 2810 } 2811 if ch.Meta().Deployment != nil && ch.Meta().Deployment.DeploymentMode == charm.ModeOperator { 2812 payloadWithDefaults["workload-context"] = false 2813 } 2814 } 2815 2816 runParallel := spec.Parallel 2817 if parallel != nil { 2818 runParallel = *parallel 2819 } 2820 runExecutionGroup := spec.ExecutionGroup 2821 if executionGroup != nil { 2822 runExecutionGroup = *executionGroup 2823 } 2824 return payloadWithDefaults, runParallel, runExecutionGroup, nil 2825 } 2826 2827 // ActionSpecs gets the ActionSpec map for the Unit's charm. 2828 func (u *Unit) ActionSpecs() (ActionSpecsByName, error) { 2829 none := ActionSpecsByName{} 2830 ch, err := u.charm() 2831 if err != nil { 2832 return none, errors.Trace(err) 2833 } 2834 chActions := ch.Actions() 2835 if chActions == nil || len(chActions.ActionSpecs) == 0 { 2836 return none, errors.Errorf("no actions defined on charm %q", ch.URL()) 2837 } 2838 return chActions.ActionSpecs, nil 2839 } 2840 2841 // CancelAction removes a pending Action from the queue for this 2842 // ActionReceiver and marks it as cancelled. 2843 func (u *Unit) CancelAction(action Action) (Action, error) { 2844 return action.Finish(ActionResults{Status: ActionCancelled}) 2845 } 2846 2847 // WatchActionNotifications starts and returns a StringsWatcher that 2848 // notifies when actions with Id prefixes matching this Unit are added 2849 func (u *Unit) WatchActionNotifications() StringsWatcher { 2850 return u.st.watchActionNotificationsFilteredBy(u) 2851 } 2852 2853 // WatchPendingActionNotifications is part of the ActionReceiver interface. 2854 func (u *Unit) WatchPendingActionNotifications() StringsWatcher { 2855 return u.st.watchEnqueuedActionsFilteredBy(u) 2856 } 2857 2858 // Actions returns a list of actions pending or completed for this unit. 2859 func (u *Unit) Actions() ([]Action, error) { 2860 return u.st.matchingActions(u) 2861 } 2862 2863 // CompletedActions returns a list of actions that have finished for 2864 // this unit. 2865 func (u *Unit) CompletedActions() ([]Action, error) { 2866 return u.st.matchingActionsCompleted(u) 2867 } 2868 2869 // PendingActions returns a list of actions pending for this unit. 2870 func (u *Unit) PendingActions() ([]Action, error) { 2871 return u.st.matchingActionsPending(u) 2872 } 2873 2874 // RunningActions returns a list of actions running on this unit. 2875 func (u *Unit) RunningActions() ([]Action, error) { 2876 return u.st.matchingActionsRunning(u) 2877 } 2878 2879 // Resolve marks the unit as having had any previous state transition 2880 // problems resolved, and informs the unit that it may attempt to 2881 // reestablish normal workflow. The retryHooks parameter informs 2882 // whether to attempt to reexecute previous failed hooks or to continue 2883 // as if they had succeeded before. 2884 func (u *Unit) Resolve(retryHooks bool) error { 2885 // We currently check agent status to see if a unit is 2886 // in error state. As the new Juju Health work is completed, 2887 // this will change to checking the unit status. 2888 statusInfo, err := u.Status() 2889 if err != nil { 2890 return err 2891 } 2892 if statusInfo.Status != status.Error { 2893 return errors.Errorf("unit %q is not in an error state", u) 2894 } 2895 mode := ResolvedNoHooks 2896 if retryHooks { 2897 mode = ResolvedRetryHooks 2898 } 2899 return u.SetResolved(mode) 2900 } 2901 2902 // SetResolved marks the unit as having had any previous state transition 2903 // problems resolved, and informs the unit that it may attempt to 2904 // reestablish normal workflow. The resolved mode parameter informs 2905 // whether to attempt to reexecute previous failed hooks or to continue 2906 // as if they had succeeded before. 2907 func (u *Unit) SetResolved(mode ResolvedMode) (err error) { 2908 defer errors.DeferredAnnotatef(&err, "cannot set resolved mode for unit %q", u) 2909 switch mode { 2910 case ResolvedRetryHooks, ResolvedNoHooks: 2911 default: 2912 return fmt.Errorf("invalid error resolution mode: %q", mode) 2913 } 2914 // TODO(fwereade): assert unit has error status. 2915 resolvedNotSet := bson.D{{"resolved", ResolvedNone}} 2916 ops := []txn.Op{{ 2917 C: unitsC, 2918 Id: u.doc.DocID, 2919 Assert: append(notDeadDoc, resolvedNotSet...), 2920 Update: bson.D{{"$set", bson.D{{"resolved", mode}}}}, 2921 }} 2922 if err := u.st.db().RunTransaction(ops); err == nil { 2923 u.doc.Resolved = mode 2924 return nil 2925 } else if err != txn.ErrAborted { 2926 return err 2927 } 2928 if ok, err := isNotDead(u.st, unitsC, u.doc.DocID); err != nil { 2929 return err 2930 } else if !ok { 2931 return stateerrors.ErrDead 2932 } 2933 // For now, the only remaining assert is that resolved was unset. 2934 return fmt.Errorf("already resolved") 2935 } 2936 2937 // ClearResolved removes any resolved setting on the unit. 2938 func (u *Unit) ClearResolved() error { 2939 ops := []txn.Op{{ 2940 C: unitsC, 2941 Id: u.doc.DocID, 2942 Assert: txn.DocExists, 2943 Update: bson.D{{"$set", bson.D{{"resolved", ResolvedNone}}}}, 2944 }} 2945 err := u.st.db().RunTransaction(ops) 2946 if err != nil { 2947 return fmt.Errorf("cannot clear resolved mode for unit %q: %v", u, errors.NotFoundf("unit")) 2948 } 2949 u.doc.Resolved = ResolvedNone 2950 return nil 2951 } 2952 2953 // StorageConstraints returns the unit's storage constraints. 2954 func (u *Unit) StorageConstraints() (map[string]StorageConstraints, error) { 2955 if u.doc.CharmURL == nil { 2956 app, err := u.st.Application(u.doc.Application) 2957 if err != nil { 2958 return nil, errors.Trace(err) 2959 } 2960 return app.StorageConstraints() 2961 } 2962 key := applicationStorageConstraintsKey(u.doc.Application, u.doc.CharmURL) 2963 cons, err := readStorageConstraints(u.st, key) 2964 if errors.IsNotFound(err) { 2965 return nil, nil 2966 } else if err != nil { 2967 return nil, errors.Trace(err) 2968 } 2969 return cons, nil 2970 } 2971 2972 type addUnitOpsArgs struct { 2973 unitDoc *unitDoc 2974 containerDoc *cloudContainerDoc 2975 agentStatusDoc statusDoc 2976 workloadStatusDoc *statusDoc 2977 workloadVersionDoc *statusDoc 2978 meterStatusDoc *meterStatusDoc 2979 } 2980 2981 // addUnitOps returns the operations required to add a unit to the units 2982 // collection, along with all the associated expected other unit entries. This 2983 // method is used by both the *Application.addUnitOpsWithCons method and the 2984 // migration import code. 2985 func addUnitOps(st *State, args addUnitOpsArgs) ([]txn.Op, error) { 2986 name := args.unitDoc.Name 2987 agentGlobalKey := unitAgentGlobalKey(name) 2988 2989 // TODO: consider the constraints op 2990 // TODO: consider storageOps 2991 var prereqOps []txn.Op 2992 if args.containerDoc != nil { 2993 prereqOps = append(prereqOps, txn.Op{ 2994 C: cloudContainersC, 2995 Id: args.containerDoc.Id, 2996 Insert: args.containerDoc, 2997 Assert: txn.DocMissing, 2998 }) 2999 } 3000 prereqOps = append(prereqOps, 3001 createStatusOp(st, agentGlobalKey, args.agentStatusDoc), 3002 createStatusOp(st, unitGlobalKey(name), *args.workloadStatusDoc), 3003 createMeterStatusOp(st, agentGlobalKey, args.meterStatusDoc), 3004 createStatusOp(st, globalWorkloadVersionKey(name), *args.workloadVersionDoc), 3005 ) 3006 3007 // Freshly-created units will not have a charm URL set; migrated 3008 // ones will, and they need to maintain their refcounts. If we 3009 // relax the restrictions on migrating apps mid-upgrade, this 3010 // will need to be more sophisticated, because it might need to 3011 // create the settings doc. 3012 if charmURL := args.unitDoc.CharmURL; charmURL != nil { 3013 appName := args.unitDoc.Application 3014 charmRefOps, err := appCharmIncRefOps(st, appName, charmURL, false) 3015 if err != nil { 3016 return nil, errors.Trace(err) 3017 } 3018 prereqOps = append(prereqOps, charmRefOps...) 3019 } 3020 3021 return append(prereqOps, txn.Op{ 3022 C: unitsC, 3023 Id: name, 3024 Assert: txn.DocMissing, 3025 Insert: args.unitDoc, 3026 }), nil 3027 } 3028 3029 // HistoryGetter allows getting the status history based on some identifying key. 3030 type HistoryGetter struct { 3031 st *State 3032 globalKey string 3033 } 3034 3035 // StatusHistory implements status.StatusHistoryGetter. 3036 func (g *HistoryGetter) StatusHistory(filter status.StatusHistoryFilter) ([]status.StatusInfo, error) { 3037 args := &statusHistoryArgs{ 3038 db: g.st.db(), 3039 globalKey: g.globalKey, 3040 filter: filter, 3041 clock: g.st.clock(), 3042 } 3043 return statusHistory(args) 3044 } 3045 3046 // UpgradeSeriesStatus returns the upgrade status of the unit's assigned machine. 3047 func (u *Unit) UpgradeSeriesStatus() (model.UpgradeSeriesStatus, string, error) { 3048 mID, err := u.AssignedMachineId() 3049 if err != nil { 3050 return "", "", errors.Trace(err) 3051 } 3052 3053 coll, closer := u.st.db().GetCollection(machineUpgradeSeriesLocksC) 3054 defer closer() 3055 3056 var lock upgradeSeriesLockDoc 3057 err = coll.FindId(mID).One(&lock) 3058 if err == mgo.ErrNotFound { 3059 return "", "", errors.NotFoundf("upgrade series lock for machine %q", mID) 3060 } 3061 if err != nil { 3062 return "", "", errors.Trace(err) 3063 } 3064 3065 sts, ok := lock.UnitStatuses[u.Name()] 3066 if !ok { 3067 return "", "", errors.NotFoundf("unit %q of machine %q", u.Name(), mID) 3068 } 3069 3070 return sts.Status, lock.ToBase, nil 3071 } 3072 3073 // SetUpgradeSeriesStatus sets the upgrade status of the units assigned machine. 3074 func (u *Unit) SetUpgradeSeriesStatus(status model.UpgradeSeriesStatus, message string) error { 3075 machine, err := u.machine() 3076 if err != nil { 3077 return err 3078 } 3079 return machine.SetUpgradeSeriesUnitStatus(u.Name(), status, message) 3080 } 3081 3082 // assertUnitNotDeadOp returns a txn.Op that asserts the given unit name is 3083 // not dead. 3084 func assertUnitNotDeadOp(st *State, unitName string) txn.Op { 3085 return txn.Op{ 3086 C: unitsC, 3087 Id: st.docID(unitName), 3088 Assert: notDeadDoc, 3089 } 3090 } 3091 3092 // assertUnitAssignedToMachineOp returns a txn.Op that asserts the given unit 3093 // name is assigned to the specified machine. 3094 func assertUnitAssignedToMachineOp(st *State, unitName, machineID string) txn.Op { 3095 return txn.Op{ 3096 C: unitsC, 3097 Id: st.docID(unitName), 3098 Assert: bson.D{{"machineid", machineID}}, 3099 } 3100 }