github.com/altoros/juju-vmware@v0.0.0-20150312064031-f19ae857ccca/state/unit.go (about) 1 // Copyright 2012-2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package state 5 6 import ( 7 stderrors "errors" 8 "fmt" 9 "time" 10 11 "github.com/juju/errors" 12 "github.com/juju/loggo" 13 "github.com/juju/names" 14 jujutxn "github.com/juju/txn" 15 "github.com/juju/utils" 16 "gopkg.in/juju/charm.v4" 17 "gopkg.in/mgo.v2" 18 "gopkg.in/mgo.v2/bson" 19 "gopkg.in/mgo.v2/txn" 20 21 "github.com/juju/juju/constraints" 22 "github.com/juju/juju/instance" 23 "github.com/juju/juju/network" 24 "github.com/juju/juju/state/presence" 25 "github.com/juju/juju/tools" 26 "github.com/juju/juju/version" 27 ) 28 29 var unitLogger = loggo.GetLogger("juju.state.unit") 30 31 // AssignmentPolicy controls what machine a unit will be assigned to. 32 type AssignmentPolicy string 33 34 const ( 35 // AssignLocal indicates that all service units should be assigned 36 // to machine 0. 37 AssignLocal AssignmentPolicy = "local" 38 39 // AssignClean indicates that every service unit should be assigned 40 // to a machine which never previously has hosted any units, and that 41 // new machines should be launched if required. 42 AssignClean AssignmentPolicy = "clean" 43 44 // AssignCleanEmpty indicates that every service unit should be assigned 45 // to a machine which never previously has hosted any units, and which is not 46 // currently hosting any containers, and that new machines should be launched if required. 47 AssignCleanEmpty AssignmentPolicy = "clean-empty" 48 49 // AssignNew indicates that every service unit should be assigned to a new 50 // dedicated machine. A new machine will be launched for each new unit. 51 AssignNew AssignmentPolicy = "new" 52 ) 53 54 // ResolvedMode describes the way state transition errors 55 // are resolved. 56 type ResolvedMode string 57 58 // These are available ResolvedMode values. 59 const ( 60 ResolvedNone ResolvedMode = "" 61 ResolvedRetryHooks ResolvedMode = "retry-hooks" 62 ResolvedNoHooks ResolvedMode = "no-hooks" 63 ) 64 65 // unitDoc represents the internal state of a unit in MongoDB. 66 // Note the correspondence with UnitInfo in apiserver/params. 67 type unitDoc struct { 68 DocID string `bson:"_id"` 69 Name string `bson:"name"` 70 EnvUUID string `bson:"env-uuid"` 71 Service string 72 Series string 73 CharmURL *charm.URL 74 Principal string 75 Subordinates []string 76 StorageInstances []string `bson:"storageinstances,omitempty"` 77 MachineId string 78 Resolved ResolvedMode 79 Tools *tools.Tools `bson:",omitempty"` 80 Life Life 81 TxnRevno int64 `bson:"txn-revno"` 82 PasswordHash string 83 84 // No longer used - to be removed. 85 Ports []network.Port 86 PublicAddress string 87 PrivateAddress string 88 } 89 90 // Unit represents the state of a service unit. 91 type Unit struct { 92 st *State 93 doc unitDoc 94 presence.Presencer 95 } 96 97 func newUnit(st *State, udoc *unitDoc) *Unit { 98 unit := &Unit{ 99 st: st, 100 doc: *udoc, 101 } 102 return unit 103 } 104 105 // Service returns the service. 106 func (u *Unit) Service() (*Service, error) { 107 return u.st.Service(u.doc.Service) 108 } 109 110 // ConfigSettings returns the complete set of service charm config settings 111 // available to the unit. Unset values will be replaced with the default 112 // value for the associated option, and may thus be nil when no default is 113 // specified. 114 func (u *Unit) ConfigSettings() (charm.Settings, error) { 115 if u.doc.CharmURL == nil { 116 return nil, fmt.Errorf("unit charm not set") 117 } 118 settings, err := readSettings(u.st, serviceSettingsKey(u.doc.Service, u.doc.CharmURL)) 119 if err != nil { 120 return nil, err 121 } 122 chrm, err := u.st.Charm(u.doc.CharmURL) 123 if err != nil { 124 return nil, err 125 } 126 result := chrm.Config().DefaultSettings() 127 for name, value := range settings.Map() { 128 result[name] = value 129 } 130 return result, nil 131 } 132 133 // ServiceName returns the service name. 134 func (u *Unit) ServiceName() string { 135 return u.doc.Service 136 } 137 138 // Series returns the deployed charm's series. 139 func (u *Unit) Series() string { 140 return u.doc.Series 141 } 142 143 // String returns the unit as string. 144 func (u *Unit) String() string { 145 return u.doc.Name 146 } 147 148 // Name returns the unit name. 149 func (u *Unit) Name() string { 150 return u.doc.Name 151 } 152 153 // unitGlobalKey returns the global database key for the named unit. 154 func unitGlobalKey(name string) string { 155 return "u#" + name 156 } 157 158 // globalKey returns the global database key for the unit. 159 func (u *Unit) globalKey() string { 160 return unitGlobalKey(u.doc.Name) 161 } 162 163 // Life returns whether the unit is Alive, Dying or Dead. 164 func (u *Unit) Life() Life { 165 return u.doc.Life 166 } 167 168 // AgentTools returns the tools that the agent is currently running. 169 // It an error that satisfies errors.IsNotFound if the tools have not 170 // yet been set. 171 func (u *Unit) AgentTools() (*tools.Tools, error) { 172 if u.doc.Tools == nil { 173 return nil, errors.NotFoundf("agent tools for unit %q", u) 174 } 175 tools := *u.doc.Tools 176 return &tools, nil 177 } 178 179 // SetAgentVersion sets the version of juju that the agent is 180 // currently running. 181 func (u *Unit) SetAgentVersion(v version.Binary) (err error) { 182 defer errors.DeferredAnnotatef(&err, "cannot set agent version for unit %q", u) 183 if err = checkVersionValidity(v); err != nil { 184 return err 185 } 186 tools := &tools.Tools{Version: v} 187 ops := []txn.Op{{ 188 C: unitsC, 189 Id: u.doc.DocID, 190 Assert: notDeadDoc, 191 Update: bson.D{{"$set", bson.D{{"tools", tools}}}}, 192 }} 193 if err := u.st.runTransaction(ops); err != nil { 194 return onAbort(err, ErrDead) 195 } 196 u.doc.Tools = tools 197 return nil 198 } 199 200 // SetPassword sets the password for the machine's agent. 201 func (u *Unit) SetPassword(password string) error { 202 if len(password) < utils.MinAgentPasswordLength { 203 return fmt.Errorf("password is only %d bytes long, and is not a valid Agent password", len(password)) 204 } 205 return u.setPasswordHash(utils.AgentPasswordHash(password)) 206 } 207 208 // setPasswordHash sets the underlying password hash in the database directly 209 // to the value supplied. This is split out from SetPassword to allow direct 210 // manipulation in tests (to check for backwards compatibility). 211 func (u *Unit) setPasswordHash(passwordHash string) error { 212 ops := []txn.Op{{ 213 C: unitsC, 214 Id: u.doc.DocID, 215 Assert: notDeadDoc, 216 Update: bson.D{{"$set", bson.D{{"passwordhash", passwordHash}}}}, 217 }} 218 err := u.st.runTransaction(ops) 219 if err != nil { 220 return fmt.Errorf("cannot set password of unit %q: %v", u, onAbort(err, ErrDead)) 221 } 222 u.doc.PasswordHash = passwordHash 223 return nil 224 } 225 226 // Return the underlying PasswordHash stored in the database. Used by the test 227 // suite to check that the PasswordHash gets properly updated to new values 228 // when compatibility mode is detected. 229 func (u *Unit) getPasswordHash() string { 230 return u.doc.PasswordHash 231 } 232 233 // PasswordValid returns whether the given password is valid 234 // for the given unit. 235 func (u *Unit) PasswordValid(password string) bool { 236 agentHash := utils.AgentPasswordHash(password) 237 if agentHash == u.doc.PasswordHash { 238 return true 239 } 240 // In Juju 1.16 and older we used the slower password hash for unit 241 // agents. So check to see if the supplied password matches the old 242 // path, and if so, update it to the new mechanism. 243 // We ignore any error in setting the password hash, as we'll just try 244 // again next time 245 if utils.UserPasswordHash(password, utils.CompatSalt) == u.doc.PasswordHash { 246 logger.Debugf("%s logged in with old password hash, changing to AgentPasswordHash", 247 u.Tag()) 248 u.setPasswordHash(agentHash) 249 return true 250 } 251 return false 252 } 253 254 // Destroy, when called on a Alive unit, advances its lifecycle as far as 255 // possible; it otherwise has no effect. In most situations, the unit's 256 // life is just set to Dying; but if a principal unit that is not assigned 257 // to a provisioned machine is Destroyed, it will be removed from state 258 // directly. 259 func (u *Unit) Destroy() (err error) { 260 defer func() { 261 if err == nil { 262 // This is a white lie; the document might actually be removed. 263 u.doc.Life = Dying 264 } 265 }() 266 unit := &Unit{st: u.st, doc: u.doc} 267 buildTxn := func(attempt int) ([]txn.Op, error) { 268 if attempt > 0 { 269 if err := unit.Refresh(); errors.IsNotFound(err) { 270 return nil, jujutxn.ErrNoOperations 271 } else if err != nil { 272 return nil, err 273 } 274 } 275 switch ops, err := unit.destroyOps(); err { 276 case errRefresh: 277 case errAlreadyDying: 278 return nil, jujutxn.ErrNoOperations 279 case nil: 280 return ops, nil 281 default: 282 return nil, err 283 } 284 return nil, jujutxn.ErrNoOperations 285 } 286 if err = unit.st.run(buildTxn); err == nil { 287 if err = unit.Refresh(); errors.IsNotFound(err) { 288 return nil 289 } 290 } 291 return err 292 } 293 294 var unitNotInstalled = bson.D{ 295 {"$or", []bson.D{ 296 {{"status", StatusPending}}, 297 {{"status", StatusAllocating}}, 298 {{"status", StatusInstalling}}, 299 }}} 300 301 // destroyOps returns the operations required to destroy the unit. If it 302 // returns errRefresh, the unit should be refreshed and the destruction 303 // operations recalculated. 304 func (u *Unit) destroyOps() ([]txn.Op, error) { 305 if u.doc.Life != Alive { 306 return nil, errAlreadyDying 307 } 308 309 // Where possible, we'd like to be able to short-circuit unit destruction 310 // such that units can be removed directly rather than waiting for their 311 // agents to start, observe Dying, set Dead, and shut down; this takes a 312 // long time and is vexing to users. This turns out to be possible if and 313 // only if the unit agent has not yet set its status; this implies that the 314 // most the unit could possibly have done is to run its install hook. 315 // 316 // There's no harm in removing a unit that's run its install hook only -- 317 // or, at least, there is no more harm than there is in removing a unit 318 // that's run its stop hook, and that's the usual condition. 319 // 320 // Principals with subordinates are never eligible for this shortcut, 321 // because the unit agent must inevitably have set a status before getting 322 // to the point where it can actually create its subordinate. 323 // 324 // Subordinates should be eligible for the shortcut but are not currently 325 // considered, on the basis that (1) they were created by active principals 326 // and can be expected to be deployed pretty soon afterwards, so we don't 327 // lose much time and (2) by maintaining this restriction, I can reduce 328 // the number of tests that have to change and defer that improvement to 329 // its own CL. 330 minUnitsOp := minUnitsTriggerOp(u.st, u.ServiceName()) 331 cleanupOp := u.st.newCleanupOp(cleanupDyingUnit, u.doc.Name) 332 setDyingOps := []txn.Op{{ 333 C: unitsC, 334 Id: u.doc.DocID, 335 Assert: isAliveDoc, 336 Update: bson.D{{"$set", bson.D{{"life", Dying}}}}, 337 }, cleanupOp, minUnitsOp} 338 if u.doc.Principal != "" { 339 return setDyingOps, nil 340 } else if len(u.doc.Subordinates)+len(u.doc.StorageInstances) != 0 { 341 return setDyingOps, nil 342 } 343 344 sdocId := u.globalKey() 345 sdoc, err := getStatus(u.st, sdocId) 346 if errors.IsNotFound(err) { 347 return nil, errAlreadyDying 348 } else if err != nil { 349 return nil, err 350 } 351 if sdoc.Status != StatusPending && sdoc.Status != StatusAllocating && sdoc.Status != StatusInstalling { 352 return setDyingOps, nil 353 } 354 ops := []txn.Op{{ 355 C: statusesC, 356 Id: u.st.docID(sdocId), 357 Assert: unitNotInstalled, 358 }, minUnitsOp} 359 removeAsserts := append(isAliveDoc, bson.DocElem{ 360 "$and", []bson.D{ 361 unitHasNoSubordinates, 362 unitHasNoStorageInstances, 363 }, 364 }) 365 removeOps, err := u.removeOps(removeAsserts) 366 if err == errAlreadyRemoved { 367 return nil, errAlreadyDying 368 } else if err != nil { 369 return nil, err 370 } 371 return append(ops, removeOps...), nil 372 } 373 374 // destroyHostOps returns all necessary operations to destroy the service unit's host machine, 375 // or ensure that the conditions preventing its destruction remain stable through the transaction. 376 func (u *Unit) destroyHostOps(s *Service) (ops []txn.Op, err error) { 377 if s.doc.Subordinate { 378 return []txn.Op{{ 379 C: unitsC, 380 Id: u.st.docID(u.doc.Principal), 381 Assert: txn.DocExists, 382 Update: bson.D{{"$pull", bson.D{{"subordinates", u.doc.Name}}}}, 383 }}, nil 384 } else if u.doc.MachineId == "" { 385 unitLogger.Errorf("unit %v unassigned", u) 386 return nil, nil 387 } 388 389 machineUpdate := bson.D{{"$pull", bson.D{{"principals", u.doc.Name}}}} 390 391 m, err := u.st.Machine(u.doc.MachineId) 392 if err != nil { 393 if errors.IsNotFound(err) { 394 return nil, nil 395 } 396 return nil, err 397 } 398 399 containerCheck := true // whether container conditions allow destroying the host machine 400 containers, err := m.Containers() 401 if err != nil { 402 return nil, err 403 } 404 if len(containers) > 0 { 405 ops = append(ops, txn.Op{ 406 C: containerRefsC, 407 Id: m.doc.DocID, 408 Assert: bson.D{{"children.0", bson.D{{"$exists", 1}}}}, 409 }) 410 containerCheck = false 411 } else { 412 ops = append(ops, txn.Op{ 413 C: containerRefsC, 414 Id: m.doc.DocID, 415 Assert: bson.D{{"$or", []bson.D{ 416 {{"children", bson.D{{"$size", 0}}}}, 417 {{"children", bson.D{{"$exists", false}}}}, 418 }}}, 419 }) 420 } 421 422 machineCheck := true // whether host machine conditions allow destroy 423 if len(m.doc.Principals) != 1 || m.doc.Principals[0] != u.doc.Name { 424 machineCheck = false 425 } else if hasJob(m.doc.Jobs, JobManageEnviron) { 426 // Check that the machine does not have any responsibilities that 427 // prevent a lifecycle change. 428 machineCheck = false 429 } else if m.doc.HasVote { 430 machineCheck = false 431 } 432 433 // assert that the machine conditions pertaining to host removal conditions 434 // remain the same throughout the transaction. 435 var machineAssert bson.D 436 if machineCheck { 437 machineAssert = bson.D{{"$and", []bson.D{ 438 {{"principals", []string{u.doc.Name}}}, 439 {{"jobs", bson.D{{"$nin", []MachineJob{JobManageEnviron}}}}}, 440 {{"hasvote", bson.D{{"$ne", true}}}}, 441 }}} 442 } else { 443 machineAssert = bson.D{{"$or", []bson.D{ 444 {{"principals", bson.D{{"$ne", []string{u.doc.Name}}}}}, 445 {{"jobs", bson.D{{"$in", []MachineJob{JobManageEnviron}}}}}, 446 {{"hasvote", true}}, 447 }}} 448 } 449 450 // If removal conditions satisfied by machine & container docs, we can 451 // destroy it, in addition to removing the unit principal. 452 if machineCheck && containerCheck { 453 machineUpdate = append(machineUpdate, bson.D{{"$set", bson.D{{"life", Dying}}}}...) 454 } 455 456 ops = append(ops, txn.Op{ 457 C: machinesC, 458 Id: m.doc.DocID, 459 Assert: machineAssert, 460 Update: machineUpdate, 461 }) 462 return ops, nil 463 } 464 465 var errAlreadyRemoved = stderrors.New("entity has already been removed") 466 467 // removeOps returns the operations necessary to remove the unit, assuming 468 // the supplied asserts apply to the unit document. 469 func (u *Unit) removeOps(asserts bson.D) ([]txn.Op, error) { 470 svc, err := u.st.Service(u.doc.Service) 471 if errors.IsNotFound(err) { 472 // If the service has been removed, the unit must already have been. 473 return nil, errAlreadyRemoved 474 } else if err != nil { 475 return nil, err 476 } 477 return svc.removeUnitOps(u, asserts) 478 } 479 480 // ErrUnitHasSubordinates is a standard error to indicate that a Unit 481 // cannot complete an operation to end its life because it still has 482 // subordinate services 483 var ErrUnitHasSubordinates = stderrors.New("unit has subordinates") 484 485 var unitHasNoSubordinates = bson.D{{ 486 "$or", []bson.D{ 487 {{"subordinates", bson.D{{"$size", 0}}}}, 488 {{"subordinates", bson.D{{"$exists", false}}}}, 489 }, 490 }} 491 492 // ErrUnitHasStorageInstances is a standard error to indicate that a Unit 493 // cannot complete an operation to end its life because it still has 494 // storage instances. 495 var ErrUnitHasStorageInstances = stderrors.New("unit has storage instances") 496 497 var unitHasNoStorageInstances = bson.D{{ 498 "$or", []bson.D{ 499 {{"storageinstances", bson.D{{"$size", 0}}}}, 500 {{"storageinstances", bson.D{{"$exists", false}}}}, 501 }, 502 }} 503 504 // EnsureDead sets the unit lifecycle to Dead if it is Alive or Dying. 505 // It does nothing otherwise. If the unit has subordinates, it will 506 // return ErrUnitHasSubordinates; otherwise, if it has storage instances, 507 // it will return ErrUnitHasStorageInstances. 508 func (u *Unit) EnsureDead() (err error) { 509 if u.doc.Life == Dead { 510 return nil 511 } 512 defer func() { 513 if err == nil { 514 u.doc.Life = Dead 515 } 516 }() 517 assert := append(notDeadDoc, bson.DocElem{ 518 "$and", []bson.D{ 519 unitHasNoSubordinates, 520 unitHasNoStorageInstances, 521 }, 522 }) 523 ops := []txn.Op{{ 524 C: unitsC, 525 Id: u.doc.DocID, 526 Assert: assert, 527 Update: bson.D{{"$set", bson.D{{"life", Dead}}}}, 528 }} 529 if err := u.st.runTransaction(ops); err != txn.ErrAborted { 530 return err 531 } 532 if notDead, err := isNotDead(u.st, unitsC, u.doc.DocID); err != nil { 533 return err 534 } else if !notDead { 535 return nil 536 } 537 if err := u.Refresh(); errors.IsNotFound(err) { 538 return nil 539 } else if err != nil { 540 return err 541 } 542 if len(u.doc.Subordinates) > 0 { 543 return ErrUnitHasSubordinates 544 } 545 return ErrUnitHasStorageInstances 546 } 547 548 // Remove removes the unit from state, and may remove its service as well, if 549 // the service is Dying and no other references to it exist. It will fail if 550 // the unit is not Dead. 551 func (u *Unit) Remove() (err error) { 552 defer errors.DeferredAnnotatef(&err, "cannot remove unit %q", u) 553 if u.doc.Life != Dead { 554 return stderrors.New("unit is not dead") 555 } 556 557 // Now the unit is Dead, we can be sure that it's impossible for it to 558 // enter relation scopes (once it's Dying, we can be sure of this; but 559 // EnsureDead does not require that it already be Dying, so this is the 560 // only point at which we can safely backstop lp:1233457 and mitigate 561 // the impact of unit agent bugs that leave relation scopes occupied). 562 relations, err := serviceRelations(u.st, u.doc.Service) 563 if err != nil { 564 return err 565 } 566 for _, rel := range relations { 567 ru, err := rel.Unit(u) 568 if err != nil { 569 return err 570 } 571 if err := ru.LeaveScope(); err != nil { 572 return err 573 } 574 } 575 576 // Now we're sure we haven't left any scopes occupied by this unit, we 577 // can safely remove the document. 578 unit := &Unit{st: u.st, doc: u.doc} 579 buildTxn := func(attempt int) ([]txn.Op, error) { 580 if attempt > 0 { 581 if err := unit.Refresh(); errors.IsNotFound(err) { 582 return nil, jujutxn.ErrNoOperations 583 } else if err != nil { 584 return nil, err 585 } 586 } 587 switch ops, err := unit.removeOps(isDeadDoc); err { 588 case errRefresh: 589 case errAlreadyDying: 590 return nil, jujutxn.ErrNoOperations 591 case nil: 592 return ops, nil 593 default: 594 return nil, err 595 } 596 return nil, jujutxn.ErrNoOperations 597 } 598 return unit.st.run(buildTxn) 599 } 600 601 // Resolved returns the resolved mode for the unit. 602 func (u *Unit) Resolved() ResolvedMode { 603 return u.doc.Resolved 604 } 605 606 // IsPrincipal returns whether the unit is deployed in its own container, 607 // and can therefore have subordinate services deployed alongside it. 608 func (u *Unit) IsPrincipal() bool { 609 return u.doc.Principal == "" 610 } 611 612 // SubordinateNames returns the names of any subordinate units. 613 func (u *Unit) SubordinateNames() []string { 614 names := make([]string, len(u.doc.Subordinates)) 615 copy(names, u.doc.Subordinates) 616 return names 617 } 618 619 // StorageInstanceIds returns the IDs of any storage instances owned by 620 // the unit. 621 func (u *Unit) StorageInstanceIds() []string { 622 ids := make([]string, len(u.doc.StorageInstances)) 623 copy(ids, u.doc.StorageInstances) 624 return ids 625 } 626 627 // RelationsJoined returns the relations for which the unit has entered scope 628 // and neither left it nor prepared to leave it 629 func (u *Unit) RelationsJoined() ([]*Relation, error) { 630 return u.relations(func(ru *RelationUnit) (bool, error) { 631 return ru.Joined() 632 }) 633 } 634 635 // RelationsInScope returns the relations for which the unit has entered scope 636 // and not left it. 637 func (u *Unit) RelationsInScope() ([]*Relation, error) { 638 return u.relations(func(ru *RelationUnit) (bool, error) { 639 return ru.InScope() 640 }) 641 } 642 643 type relationPredicate func(ru *RelationUnit) (bool, error) 644 645 // relations implements RelationsJoined and RelationsInScope. 646 func (u *Unit) relations(predicate relationPredicate) ([]*Relation, error) { 647 candidates, err := serviceRelations(u.st, u.doc.Service) 648 if err != nil { 649 return nil, err 650 } 651 var filtered []*Relation 652 for _, relation := range candidates { 653 relationUnit, err := relation.Unit(u) 654 if err != nil { 655 return nil, err 656 } 657 if include, err := predicate(relationUnit); err != nil { 658 return nil, err 659 } else if include { 660 filtered = append(filtered, relation) 661 } 662 } 663 return filtered, nil 664 } 665 666 // DeployerTag returns the tag of the agent responsible for deploying 667 // the unit. If no such entity can be determined, false is returned. 668 func (u *Unit) DeployerTag() (names.Tag, bool) { 669 if u.doc.Principal != "" { 670 return names.NewUnitTag(u.doc.Principal), true 671 } else if u.doc.MachineId != "" { 672 return names.NewMachineTag(u.doc.MachineId), true 673 } 674 return nil, false 675 } 676 677 // PrincipalName returns the name of the unit's principal. 678 // If the unit is not a subordinate, false is returned. 679 func (u *Unit) PrincipalName() (string, bool) { 680 return u.doc.Principal, u.doc.Principal != "" 681 } 682 683 // machine returns the unit's machine. 684 func (u *Unit) machine() (*Machine, error) { 685 id, err := u.AssignedMachineId() 686 if err != nil { 687 return nil, errors.Annotatef(err, "unit %v cannot get assigned machine", u) 688 } 689 m, err := u.st.Machine(id) 690 if err != nil { 691 return nil, errors.Annotatef(err, "unit %v misses machine id %v", u) 692 } 693 return m, nil 694 } 695 696 // addressesOfMachine returns Addresses of the related machine if present. 697 func (u *Unit) addressesOfMachine() []network.Address { 698 m, err := u.machine() 699 if err != nil { 700 unitLogger.Errorf("%v", err) 701 return nil 702 } 703 return m.Addresses() 704 } 705 706 // PublicAddress returns the public address of the unit and whether it is valid. 707 func (u *Unit) PublicAddress() (string, bool) { 708 var publicAddress string 709 addresses := u.addressesOfMachine() 710 if len(addresses) > 0 { 711 publicAddress = network.SelectPublicAddress(addresses) 712 } 713 return publicAddress, publicAddress != "" 714 } 715 716 // PrivateAddress returns the private address of the unit and whether it is valid. 717 func (u *Unit) PrivateAddress() (string, bool) { 718 var privateAddress string 719 addresses := u.addressesOfMachine() 720 if len(addresses) > 0 { 721 privateAddress = network.SelectInternalAddress(addresses, false) 722 } 723 return privateAddress, privateAddress != "" 724 } 725 726 // AvailabilityZone returns the name of the availability zone into which 727 // the unit's machine instance was provisioned. 728 func (u *Unit) AvailabilityZone() (string, error) { 729 m, err := u.machine() 730 if err != nil { 731 return "", errors.Trace(err) 732 } 733 return m.AvailabilityZone() 734 } 735 736 // Refresh refreshes the contents of the Unit from the underlying 737 // state. It an error that satisfies errors.IsNotFound if the unit has 738 // been removed. 739 func (u *Unit) Refresh() error { 740 units, closer := u.st.getCollection(unitsC) 741 defer closer() 742 743 err := units.FindId(u.doc.DocID).One(&u.doc) 744 if err == mgo.ErrNotFound { 745 return errors.NotFoundf("unit %q", u) 746 } 747 if err != nil { 748 return fmt.Errorf("cannot refresh unit %q: %v", u, err) 749 } 750 return nil 751 } 752 753 // Status returns the status of the unit. 754 func (u *Unit) Status() (status Status, info string, data map[string]interface{}, err error) { 755 doc, err := getStatus(u.st, u.globalKey()) 756 if err != nil { 757 return "", "", nil, err 758 } 759 status = doc.Status 760 info = doc.StatusInfo 761 data = doc.StatusData 762 return 763 } 764 765 // SetStatus sets the status of the unit agent. The optional values 766 // allow to pass additional helpful status data. 767 func (u *Unit) SetStatus(status Status, info string, data map[string]interface{}) error { 768 doc, err := newUnitAgentStatusDoc(status, info, data) 769 if err != nil { 770 return err 771 } 772 ops := []txn.Op{{ 773 C: unitsC, 774 Id: u.doc.DocID, 775 Assert: notDeadDoc, 776 }, 777 updateStatusOp(u.st, u.globalKey(), doc.statusDoc), 778 } 779 err = u.st.runTransaction(ops) 780 if err != nil { 781 return fmt.Errorf("cannot set status of unit %q: %v", u, onAbort(err, ErrDead)) 782 } 783 return nil 784 } 785 786 // OpenPorts opens the given port range and protocol for the unit, if 787 // it does not conflict with another already opened range on the 788 // unit's assigned machine. 789 func (u *Unit) OpenPorts(protocol string, fromPort, toPort int) (err error) { 790 ports, err := NewPortRange(u.Name(), fromPort, toPort, protocol) 791 if err != nil { 792 return errors.Annotatef(err, "invalid port range %v-%v/%v", fromPort, toPort, protocol) 793 } 794 defer errors.DeferredAnnotatef(&err, "cannot open ports %v for unit %q", ports, u) 795 796 machineId, err := u.AssignedMachineId() 797 if err != nil { 798 return errors.Annotatef(err, "unit %q has no assigned machine", u) 799 } 800 801 // TODO(dimitern) 2014-09-10 bug #1337804: network name is 802 // hard-coded until multiple network support lands 803 machinePorts, err := getOrCreatePorts(u.st, machineId, network.DefaultPublic) 804 if err != nil { 805 return errors.Annotatef(err, "cannot get or create ports for machine %q", machineId) 806 } 807 808 return machinePorts.OpenPorts(ports) 809 } 810 811 // ClosePorts closes the given port range and protocol for the unit. 812 func (u *Unit) ClosePorts(protocol string, fromPort, toPort int) (err error) { 813 ports, err := NewPortRange(u.Name(), fromPort, toPort, protocol) 814 if err != nil { 815 return errors.Annotatef(err, "invalid port range %v-%v/%v", fromPort, toPort, protocol) 816 } 817 defer errors.DeferredAnnotatef(&err, "cannot close ports %v for unit %q", ports, u) 818 819 machineId, err := u.AssignedMachineId() 820 if err != nil { 821 return errors.Annotatef(err, "unit %q has no assigned machine", u) 822 } 823 824 // TODO(dimitern) 2014-09-10 bug #1337804: network name is 825 // hard-coded until multiple network support lands 826 machinePorts, err := getOrCreatePorts(u.st, machineId, network.DefaultPublic) 827 if err != nil { 828 return errors.Annotatef(err, "cannot get or create ports for machine %q", machineId) 829 } 830 831 return machinePorts.ClosePorts(ports) 832 } 833 834 // OpenPort opens the given port and protocol for the unit. 835 func (u *Unit) OpenPort(protocol string, number int) error { 836 return u.OpenPorts(protocol, number, number) 837 } 838 839 // ClosePort closes the given port and protocol for the unit. 840 func (u *Unit) ClosePort(protocol string, number int) error { 841 return u.ClosePorts(protocol, number, number) 842 } 843 844 // OpenedPorts returns a slice containing the open port ranges of the 845 // unit. 846 func (u *Unit) OpenedPorts() ([]network.PortRange, error) { 847 machineId, err := u.AssignedMachineId() 848 if err != nil { 849 return nil, errors.Annotatef(err, "unit %q has no assigned machine", u) 850 } 851 852 // TODO(dimitern) 2014-09-10 bug #1337804: network name is 853 // hard-coded until multiple network support lands 854 machinePorts, err := getPorts(u.st, machineId, network.DefaultPublic) 855 result := []network.PortRange{} 856 if err == nil { 857 ports := machinePorts.PortsForUnit(u.Name()) 858 for _, port := range ports { 859 result = append(result, network.PortRange{ 860 Protocol: port.Protocol, 861 FromPort: port.FromPort, 862 ToPort: port.ToPort, 863 }) 864 } 865 } else { 866 if !errors.IsNotFound(err) { 867 return nil, errors.Annotatef(err, "failed getting ports for unit %q", u) 868 } 869 } 870 network.SortPortRanges(result) 871 return result, nil 872 } 873 874 // CharmURL returns the charm URL this unit is currently using. 875 func (u *Unit) CharmURL() (*charm.URL, bool) { 876 if u.doc.CharmURL == nil { 877 return nil, false 878 } 879 return u.doc.CharmURL, true 880 } 881 882 // SetCharmURL marks the unit as currently using the supplied charm URL. 883 // An error will be returned if the unit is dead, or the charm URL not known. 884 func (u *Unit) SetCharmURL(curl *charm.URL) error { 885 if curl == nil { 886 return fmt.Errorf("cannot set nil charm url") 887 } 888 889 db, closer := u.st.newDB() 890 defer closer() 891 envUUID := u.st.EnvironUUID() 892 units := getCollectionFromDB(db, unitsC, envUUID) 893 charms := getCollectionFromDB(db, charmsC, envUUID) 894 895 buildTxn := func(attempt int) ([]txn.Op, error) { 896 if attempt > 0 { 897 // NOTE: We're explicitly allowing SetCharmURL to succeed 898 // when the unit is Dying, because service/charm upgrades 899 // should still be allowed to apply to dying units, so 900 // that bugs in departed/broken hooks can be addressed at 901 // runtime. 902 if notDead, err := isNotDeadWithSession(units, u.doc.DocID); err != nil { 903 return nil, errors.Trace(err) 904 } else if !notDead { 905 return nil, ErrDead 906 } 907 } 908 sel := bson.D{{"_id", u.doc.DocID}, {"charmurl", curl}} 909 if count, err := units.Find(sel).Count(); err != nil { 910 return nil, errors.Trace(err) 911 } else if count == 1 { 912 // Already set 913 return nil, jujutxn.ErrNoOperations 914 } 915 if count, err := charms.FindId(curl.String()).Count(); err != nil { 916 return nil, errors.Trace(err) 917 } else if count < 1 { 918 return nil, errors.Errorf("unknown charm url %q", curl) 919 } 920 921 // Add a reference to the service settings for the new charm. 922 incOp, err := settingsIncRefOp(u.st, u.doc.Service, curl, false) 923 if err != nil { 924 return nil, errors.Trace(err) 925 } 926 927 // Set the new charm URL. 928 differentCharm := bson.D{{"charmurl", bson.D{{"$ne", curl}}}} 929 ops := []txn.Op{ 930 incOp, 931 { 932 C: unitsC, 933 Id: u.doc.DocID, 934 Assert: append(notDeadDoc, differentCharm...), 935 Update: bson.D{{"$set", bson.D{{"charmurl", curl}}}}, 936 }} 937 if u.doc.CharmURL != nil { 938 // Drop the reference to the old charm. 939 decOps, err := settingsDecRefOps(u.st, u.doc.Service, u.doc.CharmURL) 940 if err != nil { 941 return nil, errors.Trace(err) 942 } 943 ops = append(ops, decOps...) 944 } 945 return ops, nil 946 } 947 err := u.st.run(buildTxn) 948 if err == nil { 949 u.doc.CharmURL = curl 950 } 951 return err 952 } 953 954 // AgentPresence returns whether the respective remote agent is alive. 955 func (u *Unit) AgentPresence() (bool, error) { 956 return u.st.pwatcher.Alive(u.globalKey()) 957 } 958 959 // Tag returns a name identifying the unit. 960 // The returned name will be different from other Tag values returned by any 961 // other entities from the same state. 962 func (u *Unit) Tag() names.Tag { 963 return u.UnitTag() 964 } 965 966 // UnitTag returns a names.UnitTag representing this Unit, unless the 967 // unit Name is invalid, in which case it will panic 968 func (u *Unit) UnitTag() names.UnitTag { 969 return names.NewUnitTag(u.Name()) 970 } 971 972 // WaitAgentPresence blocks until the respective agent is alive. 973 func (u *Unit) WaitAgentPresence(timeout time.Duration) (err error) { 974 defer errors.DeferredAnnotatef(&err, "waiting for agent of unit %q", u) 975 ch := make(chan presence.Change) 976 u.st.pwatcher.Watch(u.globalKey(), ch) 977 defer u.st.pwatcher.Unwatch(u.globalKey(), ch) 978 for i := 0; i < 2; i++ { 979 select { 980 case change := <-ch: 981 if change.Alive { 982 return nil 983 } 984 case <-time.After(timeout): 985 return fmt.Errorf("still not alive after timeout") 986 case <-u.st.pwatcher.Dead(): 987 return u.st.pwatcher.Err() 988 } 989 } 990 panic(fmt.Sprintf("presence reported dead status twice in a row for unit %q", u)) 991 } 992 993 // SetAgentPresence signals that the agent for unit u is alive. 994 // It returns the started pinger. 995 func (u *Unit) SetAgentPresence() (*presence.Pinger, error) { 996 presenceCollection := u.st.getPresence() 997 p := presence.NewPinger(presenceCollection, u.st.EnvironTag(), u.globalKey()) 998 err := p.Start() 999 if err != nil { 1000 return nil, err 1001 } 1002 return p, nil 1003 } 1004 1005 // NotAssignedError indicates that a unit is not assigned to a machine (and, in 1006 // the case of subordinate units, that the unit's principal is not assigned). 1007 type NotAssignedError struct{ Unit *Unit } 1008 1009 func (e *NotAssignedError) Error() string { 1010 return fmt.Sprintf("unit %q is not assigned to a machine", e.Unit) 1011 } 1012 1013 // IsNotAssigned verifies that err is an instance of NotAssignedError 1014 func IsNotAssigned(err error) bool { 1015 _, ok := err.(*NotAssignedError) 1016 return ok 1017 } 1018 1019 // AssignedMachineId returns the id of the assigned machine. 1020 func (u *Unit) AssignedMachineId() (id string, err error) { 1021 if u.IsPrincipal() { 1022 if u.doc.MachineId == "" { 1023 return "", &NotAssignedError{u} 1024 } 1025 return u.doc.MachineId, nil 1026 } 1027 1028 units, closer := u.st.getCollection(unitsC) 1029 defer closer() 1030 1031 pudoc := unitDoc{} 1032 err = units.FindId(u.doc.Principal).One(&pudoc) 1033 if err == mgo.ErrNotFound { 1034 return "", errors.NotFoundf("principal unit %q of %q", u.doc.Principal, u) 1035 } else if err != nil { 1036 return "", err 1037 } 1038 if pudoc.MachineId == "" { 1039 return "", &NotAssignedError{u} 1040 } 1041 return pudoc.MachineId, nil 1042 } 1043 1044 var ( 1045 machineNotAliveErr = stderrors.New("machine is not alive") 1046 machineNotCleanErr = stderrors.New("machine is dirty") 1047 unitNotAliveErr = stderrors.New("unit is not alive") 1048 alreadyAssignedErr = stderrors.New("unit is already assigned to a machine") 1049 inUseErr = stderrors.New("machine is not unused") 1050 ) 1051 1052 // assignToMachine is the internal version of AssignToMachine, 1053 // also used by AssignToUnusedMachine. It returns specific errors 1054 // in some cases: 1055 // - machineNotAliveErr when the machine is not alive. 1056 // - unitNotAliveErr when the unit is not alive. 1057 // - alreadyAssignedErr when the unit has already been assigned 1058 // - inUseErr when the machine already has a unit assigned (if unused is true) 1059 func (u *Unit) assignToMachine(m *Machine, unused bool) (err error) { 1060 if u.doc.Series != m.doc.Series { 1061 return fmt.Errorf("series does not match") 1062 } 1063 if u.doc.MachineId != "" { 1064 if u.doc.MachineId != m.Id() { 1065 return alreadyAssignedErr 1066 } 1067 return nil 1068 } 1069 if u.doc.Principal != "" { 1070 return fmt.Errorf("unit is a subordinate") 1071 } 1072 canHost := false 1073 for _, j := range m.doc.Jobs { 1074 if j == JobHostUnits { 1075 canHost = true 1076 break 1077 } 1078 } 1079 if !canHost { 1080 return fmt.Errorf("machine %q cannot host units", m) 1081 } 1082 // assignToMachine implies assignment to an existing machine, 1083 // which is only permitted if unit placement is supported. 1084 if err := u.st.supportsUnitPlacement(); err != nil { 1085 return err 1086 } 1087 assert := append(isAliveDoc, bson.D{ 1088 {"$or", []bson.D{ 1089 {{"machineid", ""}}, 1090 {{"machineid", m.Id()}}, 1091 }}, 1092 }...) 1093 massert := isAliveDoc 1094 if unused { 1095 massert = append(massert, bson.D{{"clean", bson.D{{"$ne", false}}}}...) 1096 } 1097 ops := []txn.Op{{ 1098 C: unitsC, 1099 Id: u.doc.DocID, 1100 Assert: assert, 1101 Update: bson.D{{"$set", bson.D{{"machineid", m.doc.Id}}}}, 1102 }, { 1103 C: machinesC, 1104 Id: m.doc.DocID, 1105 Assert: massert, 1106 Update: bson.D{{"$addToSet", bson.D{{"principals", u.doc.Name}}}, {"$set", bson.D{{"clean", false}}}}, 1107 }} 1108 err = u.st.runTransaction(ops) 1109 if err == nil { 1110 u.doc.MachineId = m.doc.Id 1111 m.doc.Clean = false 1112 return nil 1113 } 1114 if err != txn.ErrAborted { 1115 return err 1116 } 1117 u0, err := u.st.Unit(u.Name()) 1118 if err != nil { 1119 return err 1120 } 1121 m0, err := u.st.Machine(m.Id()) 1122 if err != nil { 1123 return err 1124 } 1125 switch { 1126 case u0.Life() != Alive: 1127 return unitNotAliveErr 1128 case m0.Life() != Alive: 1129 return machineNotAliveErr 1130 case u0.doc.MachineId != "" || !unused: 1131 return alreadyAssignedErr 1132 } 1133 return inUseErr 1134 } 1135 1136 func assignContextf(err *error, unit *Unit, target string) { 1137 if *err != nil { 1138 *err = fmt.Errorf("cannot assign unit %q to %s: %v", unit, target, *err) 1139 } 1140 } 1141 1142 // AssignToMachine assigns this unit to a given machine. 1143 func (u *Unit) AssignToMachine(m *Machine) (err error) { 1144 defer assignContextf(&err, u, fmt.Sprintf("machine %s", m)) 1145 return u.assignToMachine(m, false) 1146 } 1147 1148 // assignToNewMachine assigns the unit to a machine created according to 1149 // the supplied params, with the supplied constraints. 1150 func (u *Unit) assignToNewMachine(template MachineTemplate, parentId string, containerType instance.ContainerType) error { 1151 template.principals = []string{u.doc.Name} 1152 template.Dirty = true 1153 1154 var ( 1155 mdoc *machineDoc 1156 ops []txn.Op 1157 err error 1158 ) 1159 switch { 1160 case parentId == "" && containerType == "": 1161 mdoc, ops, err = u.st.addMachineOps(template) 1162 case parentId == "": 1163 if containerType == "" { 1164 return fmt.Errorf("assignToNewMachine called without container type (should never happen)") 1165 } 1166 // The new parent machine is clean and only hosts units, 1167 // regardless of its child. 1168 parentParams := template 1169 parentParams.Jobs = []MachineJob{JobHostUnits} 1170 mdoc, ops, err = u.st.addMachineInsideNewMachineOps(template, parentParams, containerType) 1171 default: 1172 // Container type is specified but no parent id. 1173 mdoc, ops, err = u.st.addMachineInsideMachineOps(template, parentId, containerType) 1174 } 1175 if err != nil { 1176 return err 1177 } 1178 // Ensure the host machine is really clean. 1179 if parentId != "" { 1180 parentDocId := u.st.docID(parentId) 1181 ops = append(ops, txn.Op{ 1182 C: machinesC, 1183 Id: parentDocId, 1184 Assert: bson.D{{"clean", true}}, 1185 }, txn.Op{ 1186 C: containerRefsC, 1187 Id: parentDocId, 1188 Assert: bson.D{hasNoContainersTerm}, 1189 }) 1190 } 1191 isUnassigned := bson.D{{"machineid", ""}} 1192 asserts := append(isAliveDoc, isUnassigned...) 1193 ops = append(ops, txn.Op{ 1194 C: unitsC, 1195 Id: u.doc.DocID, 1196 Assert: asserts, 1197 Update: bson.D{{"$set", bson.D{{"machineid", mdoc.Id}}}}, 1198 }) 1199 1200 err = u.st.runTransaction(ops) 1201 if err == nil { 1202 u.doc.MachineId = mdoc.Id 1203 return nil 1204 } else if err != txn.ErrAborted { 1205 return err 1206 } 1207 1208 // If we assume that the machine ops will never give us an 1209 // operation that would fail (because the machine id(s) that it 1210 // chooses are unique), then the only reasons that the 1211 // transaction could have been aborted are: 1212 // * the unit is no longer alive 1213 // * the unit has been assigned to a different machine 1214 // * the parent machine we want to create a container on was 1215 // clean but became dirty 1216 unit, err := u.st.Unit(u.Name()) 1217 if err != nil { 1218 return err 1219 } 1220 switch { 1221 case unit.Life() != Alive: 1222 return unitNotAliveErr 1223 case unit.doc.MachineId != "": 1224 return alreadyAssignedErr 1225 } 1226 if parentId == "" { 1227 return fmt.Errorf("cannot add top level machine: transaction aborted for unknown reason") 1228 } 1229 m, err := u.st.Machine(parentId) 1230 if err != nil { 1231 return err 1232 } 1233 if !m.Clean() { 1234 return machineNotCleanErr 1235 } 1236 containers, err := m.Containers() 1237 if err != nil { 1238 return err 1239 } 1240 if len(containers) > 0 { 1241 return machineNotCleanErr 1242 } 1243 return fmt.Errorf("cannot add container within machine: transaction aborted for unknown reason") 1244 } 1245 1246 // Constraints returns the unit's deployment constraints. 1247 func (u *Unit) Constraints() (*constraints.Value, error) { 1248 cons, err := readConstraints(u.st, u.globalKey()) 1249 if errors.IsNotFound(err) { 1250 // Lack of constraints indicates lack of unit. 1251 return nil, errors.NotFoundf("unit") 1252 } else if err != nil { 1253 return nil, err 1254 } 1255 return &cons, nil 1256 } 1257 1258 // AssignToNewMachineOrContainer assigns the unit to a new machine, 1259 // with constraints determined according to the service and 1260 // environment constraints at the time of unit creation. If a 1261 // container is required, a clean, empty machine instance is required 1262 // on which to create the container. An existing clean, empty instance 1263 // is first searched for, and if not found, a new one is created. 1264 func (u *Unit) AssignToNewMachineOrContainer() (err error) { 1265 defer assignContextf(&err, u, "new machine or container") 1266 if u.doc.Principal != "" { 1267 return fmt.Errorf("unit is a subordinate") 1268 } 1269 cons, err := u.Constraints() 1270 if err != nil { 1271 return err 1272 } 1273 if !cons.HasContainer() { 1274 return u.AssignToNewMachine() 1275 } 1276 1277 // Find a clean, empty machine on which to create a container. 1278 var host machineDoc 1279 hostCons := *cons 1280 noContainer := instance.NONE 1281 hostCons.Container = &noContainer 1282 query, closer, err := u.findCleanMachineQuery(true, &hostCons) 1283 if err != nil { 1284 return err 1285 } 1286 defer closer() 1287 err = query.One(&host) 1288 if err == mgo.ErrNotFound { 1289 // No existing clean, empty machine so create a new one. 1290 // The container constraint will be used by AssignToNewMachine to create the required container. 1291 return u.AssignToNewMachine() 1292 } else if err != nil { 1293 return err 1294 } 1295 svc, err := u.Service() 1296 if err != nil { 1297 return err 1298 } 1299 requestedNetworks, err := svc.Networks() 1300 if err != nil { 1301 return err 1302 } 1303 template := MachineTemplate{ 1304 Series: u.doc.Series, 1305 Constraints: *cons, 1306 Jobs: []MachineJob{JobHostUnits}, 1307 RequestedNetworks: requestedNetworks, 1308 } 1309 err = u.assignToNewMachine(template, host.Id, *cons.Container) 1310 if err == machineNotCleanErr { 1311 // The clean machine was used before we got a chance to use it so just 1312 // stick the unit on a new machine. 1313 return u.AssignToNewMachine() 1314 } 1315 return err 1316 } 1317 1318 // AssignToNewMachine assigns the unit to a new machine, with constraints 1319 // determined according to the service and environment constraints at the 1320 // time of unit creation. 1321 func (u *Unit) AssignToNewMachine() (err error) { 1322 defer assignContextf(&err, u, "new machine") 1323 if u.doc.Principal != "" { 1324 return fmt.Errorf("unit is a subordinate") 1325 } 1326 // Get the ops necessary to create a new machine, and the machine doc that 1327 // will be added with those operations (which includes the machine id). 1328 cons, err := u.Constraints() 1329 if err != nil { 1330 return err 1331 } 1332 var containerType instance.ContainerType 1333 // Configure to create a new container if required. 1334 if cons.HasContainer() { 1335 containerType = *cons.Container 1336 } 1337 svc, err := u.Service() 1338 if err != nil { 1339 return err 1340 } 1341 requestedNetworks, err := svc.Networks() 1342 if err != nil { 1343 return err 1344 } 1345 storageInstances, err := u.StorageInstances() 1346 if err != nil { 1347 return err 1348 } 1349 var blockDeviceParams []BlockDeviceParams 1350 for _, storageInstance := range storageInstances { 1351 // TODO(axw) consult storage provider to see if we need to request 1352 // a block device for the storage instance. 1353 storageInstanceParams, _ := storageInstance.Params() 1354 blockDeviceParams = append(blockDeviceParams, BlockDeviceParams{ 1355 storageInstance: storageInstance.Id(), 1356 Size: storageInstanceParams.Size, 1357 }) 1358 } 1359 template := MachineTemplate{ 1360 Series: u.doc.Series, 1361 Constraints: *cons, 1362 Jobs: []MachineJob{JobHostUnits}, 1363 RequestedNetworks: requestedNetworks, 1364 BlockDevices: blockDeviceParams, 1365 } 1366 return u.assignToNewMachine(template, "", containerType) 1367 } 1368 1369 var noCleanMachines = stderrors.New("all eligible machines in use") 1370 1371 // AssignToCleanMachine assigns u to a machine which is marked as clean. A machine 1372 // is clean if it has never had any principal units assigned to it. 1373 // If there are no clean machines besides any machine(s) running JobHostEnviron, 1374 // an error is returned. 1375 // This method does not take constraints into consideration when choosing a 1376 // machine (lp:1161919). 1377 func (u *Unit) AssignToCleanMachine() (m *Machine, err error) { 1378 return u.assignToCleanMaybeEmptyMachine(false) 1379 } 1380 1381 // AssignToCleanEmptyMachine assigns u to a machine which is marked as clean and is also 1382 // not hosting any containers. A machine is clean if it has never had any principal units 1383 // assigned to it. If there are no clean machines besides any machine(s) running JobHostEnviron, 1384 // an error is returned. 1385 // This method does not take constraints into consideration when choosing a 1386 // machine (lp:1161919). 1387 func (u *Unit) AssignToCleanEmptyMachine() (m *Machine, err error) { 1388 return u.assignToCleanMaybeEmptyMachine(true) 1389 } 1390 1391 var hasContainerTerm = bson.DocElem{ 1392 "$and", []bson.D{ 1393 {{"children", bson.D{{"$not", bson.D{{"$size", 0}}}}}}, 1394 {{"children", bson.D{{"$exists", true}}}}, 1395 }} 1396 1397 var hasNoContainersTerm = bson.DocElem{ 1398 "$or", []bson.D{ 1399 {{"children", bson.D{{"$size", 0}}}}, 1400 {{"children", bson.D{{"$exists", false}}}}, 1401 }} 1402 1403 // findCleanMachineQuery returns a Mongo query to find clean (and possibly empty) machines with 1404 // characteristics matching the specified constraints. 1405 func (u *Unit) findCleanMachineQuery(requireEmpty bool, cons *constraints.Value) (_ *mgo.Query, _ func(), err error) { 1406 db, closer := u.st.newDB() 1407 defer func() { 1408 if err != nil { 1409 closer() 1410 } 1411 }() 1412 containerRefsCollection := db.C(containerRefsC) 1413 1414 // Select all machines that can accept principal units and are clean. 1415 var containerRefs []machineContainers 1416 // If we need empty machines, first build up a list of machine ids which have containers 1417 // so we can exclude those. 1418 if requireEmpty { 1419 err = containerRefsCollection.Find(bson.D{hasContainerTerm}).All(&containerRefs) 1420 if err != nil { 1421 return nil, closer, err 1422 } 1423 } 1424 var machinesWithContainers = make([]string, len(containerRefs)) 1425 for i, cref := range containerRefs { 1426 machinesWithContainers[i] = cref.Id 1427 } 1428 terms := bson.D{ 1429 {"life", Alive}, 1430 {"series", u.doc.Series}, 1431 {"jobs", []MachineJob{JobHostUnits}}, 1432 {"clean", true}, 1433 {"machineid", bson.D{{"$nin", machinesWithContainers}}}, 1434 } 1435 // Add the container filter term if necessary. 1436 var containerType instance.ContainerType 1437 if cons.Container != nil { 1438 containerType = *cons.Container 1439 } 1440 if containerType == instance.NONE { 1441 terms = append(terms, bson.DocElem{"containertype", ""}) 1442 } else if containerType != "" { 1443 terms = append(terms, bson.DocElem{"containertype", string(containerType)}) 1444 } 1445 1446 // Find the ids of machines which satisfy any required hardware 1447 // constraints. If there is no instanceData for a machine, that 1448 // machine is not considered as suitable for deploying the unit. 1449 // This can happen if the machine is not yet provisioned. It may 1450 // be that when the machine is provisioned it will be found to 1451 // be suitable, but we don't know that right now and it's best 1452 // to err on the side of caution and exclude such machines. 1453 var suitableInstanceData []instanceData 1454 var suitableTerms bson.D 1455 if cons.Arch != nil && *cons.Arch != "" { 1456 suitableTerms = append(suitableTerms, bson.DocElem{"arch", *cons.Arch}) 1457 } 1458 if cons.Mem != nil && *cons.Mem > 0 { 1459 suitableTerms = append(suitableTerms, bson.DocElem{"mem", bson.D{{"$gte", *cons.Mem}}}) 1460 } 1461 if cons.RootDisk != nil && *cons.RootDisk > 0 { 1462 suitableTerms = append(suitableTerms, bson.DocElem{"rootdisk", bson.D{{"$gte", *cons.RootDisk}}}) 1463 } 1464 if cons.CpuCores != nil && *cons.CpuCores > 0 { 1465 suitableTerms = append(suitableTerms, bson.DocElem{"cpucores", bson.D{{"$gte", *cons.CpuCores}}}) 1466 } 1467 if cons.CpuPower != nil && *cons.CpuPower > 0 { 1468 suitableTerms = append(suitableTerms, bson.DocElem{"cpupower", bson.D{{"$gte", *cons.CpuPower}}}) 1469 } 1470 if cons.Tags != nil && len(*cons.Tags) > 0 { 1471 suitableTerms = append(suitableTerms, bson.DocElem{"tags", bson.D{{"$all", *cons.Tags}}}) 1472 } 1473 if len(suitableTerms) > 0 { 1474 instanceData := db.C(instanceDataC) 1475 err := instanceData.Find(suitableTerms).Select(bson.M{"_id": 1}).All(&suitableInstanceData) 1476 if err != nil { 1477 return nil, closer, err 1478 } 1479 var suitableIds = make([]string, len(suitableInstanceData)) 1480 for i, m := range suitableInstanceData { 1481 suitableIds[i] = m.DocID 1482 } 1483 terms = append(terms, bson.DocElem{"_id", bson.D{{"$in", suitableIds}}}) 1484 } 1485 machines := db.C(machinesC) 1486 return machines.Find(terms), closer, nil 1487 } 1488 1489 // assignToCleanMaybeEmptyMachine implements AssignToCleanMachine and AssignToCleanEmptyMachine. 1490 // A 'machine' may be a machine instance or container depending on the service constraints. 1491 func (u *Unit) assignToCleanMaybeEmptyMachine(requireEmpty bool) (m *Machine, err error) { 1492 context := "clean" 1493 if requireEmpty { 1494 context += ", empty" 1495 } 1496 context += " machine" 1497 1498 if u.doc.Principal != "" { 1499 err = fmt.Errorf("unit is a subordinate") 1500 assignContextf(&err, u, context) 1501 return nil, err 1502 } 1503 1504 // TODO(axw) once we support dynamic storage provisioning, we 1505 // should check whether all of the storage constraints can be 1506 // fulfilled dynamically (by querying a policy). 1507 storageCons, err := u.StorageConstraints() 1508 if err != nil { 1509 assignContextf(&err, u, context) 1510 return nil, err 1511 } 1512 if len(storageCons) > 0 { 1513 return nil, noCleanMachines 1514 } 1515 1516 // Get the unit constraints to see what deployment requirements we have to adhere to. 1517 cons, err := u.Constraints() 1518 if err != nil { 1519 assignContextf(&err, u, context) 1520 return nil, err 1521 } 1522 query, closer, err := u.findCleanMachineQuery(requireEmpty, cons) 1523 if err != nil { 1524 assignContextf(&err, u, context) 1525 return nil, err 1526 } 1527 defer closer() 1528 1529 // Find all of the candidate machines, and associated 1530 // instances for those that are provisioned. Instances 1531 // will be distributed across in preference to 1532 // unprovisioned machines. 1533 var mdocs []*machineDoc 1534 if err := query.All(&mdocs); err != nil { 1535 assignContextf(&err, u, context) 1536 return nil, err 1537 } 1538 var unprovisioned []*Machine 1539 var instances []instance.Id 1540 instanceMachines := make(map[instance.Id]*Machine) 1541 for _, mdoc := range mdocs { 1542 m := newMachine(u.st, mdoc) 1543 instance, err := m.InstanceId() 1544 if errors.IsNotProvisioned(err) { 1545 unprovisioned = append(unprovisioned, m) 1546 } else if err != nil { 1547 assignContextf(&err, u, context) 1548 return nil, err 1549 } else { 1550 instances = append(instances, instance) 1551 instanceMachines[instance] = m 1552 } 1553 } 1554 1555 // Filter the list of instances that are suitable for 1556 // distribution, and then map them back to machines. 1557 // 1558 // TODO(axw) 2014-05-30 #1324904 1559 // Shuffle machines to reduce likelihood of collisions. 1560 // The partition of provisioned/unprovisioned machines 1561 // must be maintained. 1562 if instances, err = distributeUnit(u, instances); err != nil { 1563 assignContextf(&err, u, context) 1564 return nil, err 1565 } 1566 machines := make([]*Machine, len(instances), len(instances)+len(unprovisioned)) 1567 for i, instance := range instances { 1568 m, ok := instanceMachines[instance] 1569 if !ok { 1570 err := fmt.Errorf("invalid instance returned: %v", instance) 1571 assignContextf(&err, u, context) 1572 return nil, err 1573 } 1574 machines[i] = m 1575 } 1576 machines = append(machines, unprovisioned...) 1577 1578 // TODO(axw) 2014-05-30 #1253704 1579 // We should not select a machine that is in the process 1580 // of being provisioned. There's no point asserting that 1581 // the machine hasn't been provisioned, as there'll still 1582 // be a period of time during which the machine may be 1583 // provisioned without the fact having yet been recorded 1584 // in state. 1585 for _, m := range machines { 1586 err := u.assignToMachine(m, true) 1587 if err == nil { 1588 return m, nil 1589 } 1590 if err != inUseErr && err != machineNotAliveErr { 1591 assignContextf(&err, u, context) 1592 return nil, err 1593 } 1594 } 1595 return nil, noCleanMachines 1596 } 1597 1598 // UnassignFromMachine removes the assignment between this unit and the 1599 // machine it's assigned to. 1600 func (u *Unit) UnassignFromMachine() (err error) { 1601 // TODO check local machine id and add an assert that the 1602 // machine id is as expected. 1603 ops := []txn.Op{{ 1604 C: unitsC, 1605 Id: u.doc.DocID, 1606 Assert: txn.DocExists, 1607 Update: bson.D{{"$set", bson.D{{"machineid", ""}}}}, 1608 }} 1609 if u.doc.MachineId != "" { 1610 ops = append(ops, txn.Op{ 1611 C: machinesC, 1612 Id: u.st.docID(u.doc.MachineId), 1613 Assert: txn.DocExists, 1614 Update: bson.D{{"$pull", bson.D{{"principals", u.doc.Name}}}}, 1615 }) 1616 } 1617 err = u.st.runTransaction(ops) 1618 if err != nil { 1619 return fmt.Errorf("cannot unassign unit %q from machine: %v", u, onAbort(err, errors.NotFoundf("machine"))) 1620 } 1621 u.doc.MachineId = "" 1622 return nil 1623 } 1624 1625 // ActionSpecsByName is a map of action names to their respective ActionSpec. 1626 type ActionSpecsByName map[string]charm.ActionSpec 1627 1628 // AddAction adds a new Action of type name and using arguments payload to 1629 // this Unit, and returns its ID. 1630 func (u *Unit) AddAction(name string, payload map[string]interface{}) (*Action, error) { 1631 if len(name) == 0 { 1632 return nil, errors.New("no action name given") 1633 } 1634 specs, err := u.ActionSpecs() 1635 if err != nil { 1636 return nil, err 1637 } 1638 spec, ok := specs[name] 1639 if !ok { 1640 return nil, errors.Errorf("action %q not defined on unit %q", name, u.Name()) 1641 } 1642 err = spec.ValidateParams(payload) 1643 if err != nil { 1644 return nil, err 1645 } 1646 return u.st.EnqueueAction(u.Tag(), name, payload) 1647 } 1648 1649 // ActionSpecs gets the ActionSpec map for the Unit's charm. 1650 func (u *Unit) ActionSpecs() (ActionSpecsByName, error) { 1651 none := ActionSpecsByName{} 1652 curl, _ := u.CharmURL() 1653 if curl == nil { 1654 // If unit charm URL is not yet set, fall back to service 1655 svc, err := u.Service() 1656 if err != nil { 1657 return none, err 1658 } 1659 curl, _ = svc.CharmURL() 1660 if curl == nil { 1661 return none, errors.Errorf("no URL set for service %q", svc.Name()) 1662 } 1663 } 1664 ch, err := u.st.Charm(curl) 1665 if err != nil { 1666 return none, errors.Annotatef(err, "unable to get charm with URL %q", curl.String()) 1667 } 1668 chActions := ch.Actions() 1669 if chActions == nil || len(chActions.ActionSpecs) == 0 { 1670 return none, errors.Errorf("no actions defined on charm %q", ch.String()) 1671 } 1672 return chActions.ActionSpecs, nil 1673 } 1674 1675 // CancelAction removes a pending Action from the queue for this 1676 // ActionReceiver and marks it as cancelled. 1677 func (u *Unit) CancelAction(action *Action) (*Action, error) { 1678 return action.Finish(ActionResults{Status: ActionCancelled}) 1679 } 1680 1681 // WatchActionNotifications starts and returns a StringsWatcher that 1682 // notifies when actions with Id prefixes matching this Unit are added 1683 func (u *Unit) WatchActionNotifications() StringsWatcher { 1684 return u.st.watchEnqueuedActionsFilteredBy(u) 1685 } 1686 1687 // Actions returns a list of actions pending or completed for this unit. 1688 func (u *Unit) Actions() ([]*Action, error) { 1689 return u.st.matchingActions(u) 1690 } 1691 1692 // CompletedActions returns a list of actions that have finished for 1693 // this unit. 1694 func (u *Unit) CompletedActions() ([]*Action, error) { 1695 return u.st.matchingActionsCompleted(u) 1696 } 1697 1698 // PendingActions returns a list of actions pending for this unit. 1699 func (u *Unit) PendingActions() ([]*Action, error) { 1700 return u.st.matchingActionsPending(u) 1701 } 1702 1703 // RunningActions returns a list of actions running on this unit. 1704 func (u *Unit) RunningActions() ([]*Action, error) { 1705 return u.st.matchingActionsRunning(u) 1706 } 1707 1708 // Resolve marks the unit as having had any previous state transition 1709 // problems resolved, and informs the unit that it may attempt to 1710 // reestablish normal workflow. The retryHooks parameter informs 1711 // whether to attempt to reexecute previous failed hooks or to continue 1712 // as if they had succeeded before. 1713 func (u *Unit) Resolve(retryHooks bool) error { 1714 status, _, _, err := u.Status() 1715 if err != nil { 1716 return err 1717 } 1718 if status != StatusError { 1719 return errors.Errorf("unit %q is not in an error state", u) 1720 } 1721 mode := ResolvedNoHooks 1722 if retryHooks { 1723 mode = ResolvedRetryHooks 1724 } 1725 return u.SetResolved(mode) 1726 } 1727 1728 // SetResolved marks the unit as having had any previous state transition 1729 // problems resolved, and informs the unit that it may attempt to 1730 // reestablish normal workflow. The resolved mode parameter informs 1731 // whether to attempt to reexecute previous failed hooks or to continue 1732 // as if they had succeeded before. 1733 func (u *Unit) SetResolved(mode ResolvedMode) (err error) { 1734 defer errors.DeferredAnnotatef(&err, "cannot set resolved mode for unit %q", u) 1735 switch mode { 1736 case ResolvedRetryHooks, ResolvedNoHooks: 1737 default: 1738 return fmt.Errorf("invalid error resolution mode: %q", mode) 1739 } 1740 // TODO(fwereade): assert unit has error status. 1741 resolvedNotSet := bson.D{{"resolved", ResolvedNone}} 1742 ops := []txn.Op{{ 1743 C: unitsC, 1744 Id: u.doc.DocID, 1745 Assert: append(notDeadDoc, resolvedNotSet...), 1746 Update: bson.D{{"$set", bson.D{{"resolved", mode}}}}, 1747 }} 1748 if err := u.st.runTransaction(ops); err == nil { 1749 u.doc.Resolved = mode 1750 return nil 1751 } else if err != txn.ErrAborted { 1752 return err 1753 } 1754 if ok, err := isNotDead(u.st, unitsC, u.doc.DocID); err != nil { 1755 return err 1756 } else if !ok { 1757 return ErrDead 1758 } 1759 // For now, the only remaining assert is that resolved was unset. 1760 return fmt.Errorf("already resolved") 1761 } 1762 1763 // ClearResolved removes any resolved setting on the unit. 1764 func (u *Unit) ClearResolved() error { 1765 ops := []txn.Op{{ 1766 C: unitsC, 1767 Id: u.doc.DocID, 1768 Assert: txn.DocExists, 1769 Update: bson.D{{"$set", bson.D{{"resolved", ResolvedNone}}}}, 1770 }} 1771 err := u.st.runTransaction(ops) 1772 if err != nil { 1773 return fmt.Errorf("cannot clear resolved mode for unit %q: %v", u, errors.NotFoundf("unit")) 1774 } 1775 u.doc.Resolved = ResolvedNone 1776 return nil 1777 } 1778 1779 // AddMetric adds a new batch of metrics to the database. 1780 // A UUID for the metric will be generated and the new MetricBatch will be returned 1781 func (u *Unit) AddMetrics(created time.Time, metrics []Metric) (*MetricBatch, error) { 1782 charmUrl, ok := u.CharmURL() 1783 if !ok { 1784 return nil, stderrors.New("failed to add metrics, couldn't find charm url") 1785 } 1786 service, err := u.Service() 1787 if err != nil { 1788 return nil, errors.Annotatef(err, "couldn't retrieve service whilst adding metrics") 1789 } 1790 return u.st.addMetrics(u.UnitTag(), charmUrl, created, metrics, service.MetricCredentials()) 1791 } 1792 1793 // StorageConstraints returns the unit's storage constraints. 1794 func (u *Unit) StorageConstraints() (map[string]StorageConstraints, error) { 1795 // TODO(axw) eventually we should be able to override service 1796 // storage constraints at the unit level. 1797 return readStorageConstraints(u.st, serviceGlobalKey(u.doc.Service)) 1798 } 1799 1800 // StorageInstances returns the storage instances owned by this unit. 1801 func (u *Unit) StorageInstances() ([]StorageInstance, error) { 1802 return readStorageInstances(u.st, u.Tag()) 1803 }