github.com/cloudbase/juju-core@v0.0.0-20140504232958-a7271ac7912f/state/machine.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package state 5 6 import ( 7 "fmt" 8 "strings" 9 "time" 10 11 "labix.org/v2/mgo" 12 "labix.org/v2/mgo/txn" 13 14 "launchpad.net/juju-core/constraints" 15 "launchpad.net/juju-core/errors" 16 "launchpad.net/juju-core/instance" 17 "launchpad.net/juju-core/names" 18 "launchpad.net/juju-core/state/api/params" 19 "launchpad.net/juju-core/state/presence" 20 "launchpad.net/juju-core/tools" 21 "launchpad.net/juju-core/utils" 22 "launchpad.net/juju-core/version" 23 ) 24 25 // Machine represents the state of a machine. 26 type Machine struct { 27 st *State 28 doc machineDoc 29 annotator 30 } 31 32 // MachineJob values define responsibilities that machines may be 33 // expected to fulfil. 34 type MachineJob int 35 36 const ( 37 _ MachineJob = iota 38 JobHostUnits 39 JobManageEnviron 40 41 // Deprecated in 1.18. 42 JobManageStateDeprecated 43 ) 44 45 var jobNames = map[MachineJob]params.MachineJob{ 46 JobHostUnits: params.JobHostUnits, 47 JobManageEnviron: params.JobManageEnviron, 48 49 // Deprecated in 1.18. 50 JobManageStateDeprecated: params.JobManageStateDeprecated, 51 } 52 53 // AllJobs returns all supported machine jobs. 54 func AllJobs() []MachineJob { 55 return []MachineJob{JobHostUnits, JobManageEnviron} 56 } 57 58 // ToParams returns the job as params.MachineJob. 59 func (job MachineJob) ToParams() params.MachineJob { 60 if paramsJob, ok := jobNames[job]; ok { 61 return paramsJob 62 } 63 return params.MachineJob(fmt.Sprintf("<unknown job %d>", int(job))) 64 } 65 66 // MachineJobFromParams returns the job corresponding to params.MachineJob. 67 func MachineJobFromParams(job params.MachineJob) (MachineJob, error) { 68 for machineJob, paramJob := range jobNames { 69 if paramJob == job { 70 return machineJob, nil 71 } 72 } 73 return -1, fmt.Errorf("invalid machine job %q", job) 74 } 75 76 func (job MachineJob) String() string { 77 return string(job.ToParams()) 78 } 79 80 // machineDoc represents the internal state of a machine in MongoDB. 81 // Note the correspondence with MachineInfo in state/api/params. 82 type machineDoc struct { 83 Id string `bson:"_id"` 84 Nonce string 85 Series string 86 ContainerType string 87 Principals []string 88 Life Life 89 Tools *tools.Tools `bson:",omitempty"` 90 Jobs []MachineJob 91 NoVote bool 92 HasVote bool 93 PasswordHash string 94 Clean bool 95 // We store 2 different sets of addresses for the machine, obtained 96 // from different sources. 97 // Addresses is the set of addresses obtained by asking the provider. 98 Addresses []address 99 // MachineAddresses is the set of addresses obtained from the machine itself. 100 MachineAddresses []address 101 // The SupportedContainers attributes are used to advertise what containers this 102 // machine is capable of hosting. 103 SupportedContainersKnown bool 104 SupportedContainers []instance.ContainerType `bson:",omitempty"` 105 // Deprecated. InstanceId, now lives on instanceData. 106 // This attribute is retained so that data from existing machines can be read. 107 // SCHEMACHANGE 108 // TODO(wallyworld): remove this attribute when schema upgrades are possible. 109 InstanceId instance.Id 110 } 111 112 func newMachine(st *State, doc *machineDoc) *Machine { 113 machine := &Machine{ 114 st: st, 115 doc: *doc, 116 } 117 machine.annotator = annotator{ 118 globalKey: machine.globalKey(), 119 tag: machine.Tag(), 120 st: st, 121 } 122 return machine 123 } 124 125 // Id returns the machine id. 126 func (m *Machine) Id() string { 127 return m.doc.Id 128 } 129 130 // Series returns the operating system series running on the machine. 131 func (m *Machine) Series() string { 132 return m.doc.Series 133 } 134 135 // ContainerType returns the type of container hosting this machine. 136 func (m *Machine) ContainerType() instance.ContainerType { 137 return instance.ContainerType(m.doc.ContainerType) 138 } 139 140 // machineGlobalKey returns the global database key for the identified machine. 141 func machineGlobalKey(id string) string { 142 return "m#" + id 143 } 144 145 // globalKey returns the global database key for the machine. 146 func (m *Machine) globalKey() string { 147 return machineGlobalKey(m.doc.Id) 148 } 149 150 // instanceData holds attributes relevant to a provisioned machine. 151 type instanceData struct { 152 Id string `bson:"_id"` 153 InstanceId instance.Id `bson:"instanceid"` 154 Status string `bson:"status,omitempty"` 155 Arch *string `bson:"arch,omitempty"` 156 Mem *uint64 `bson:"mem,omitempty"` 157 RootDisk *uint64 `bson:"rootdisk,omitempty"` 158 CpuCores *uint64 `bson:"cpucores,omitempty"` 159 CpuPower *uint64 `bson:"cpupower,omitempty"` 160 Tags *[]string `bson:"tags,omitempty"` 161 } 162 163 // TODO(wallyworld): move this method to a service. 164 func (m *Machine) HardwareCharacteristics() (*instance.HardwareCharacteristics, error) { 165 hc := &instance.HardwareCharacteristics{} 166 instData, err := getInstanceData(m.st, m.Id()) 167 if err != nil { 168 return nil, err 169 } 170 hc.Arch = instData.Arch 171 hc.Mem = instData.Mem 172 hc.RootDisk = instData.RootDisk 173 hc.CpuCores = instData.CpuCores 174 hc.CpuPower = instData.CpuPower 175 hc.Tags = instData.Tags 176 return hc, nil 177 } 178 179 func getInstanceData(st *State, id string) (instanceData, error) { 180 var instData instanceData 181 err := st.instanceData.FindId(id).One(&instData) 182 if err == mgo.ErrNotFound { 183 return instanceData{}, errors.NotFoundf("instance data for machine %v", id) 184 } 185 if err != nil { 186 return instanceData{}, fmt.Errorf("cannot get instance data for machine %v: %v", id, err) 187 } 188 return instData, nil 189 } 190 191 // Tag returns a name identifying the machine that is safe to use 192 // as a file name. The returned name will be different from other 193 // Tag values returned by any other entities from the same state. 194 func (m *Machine) Tag() string { 195 return names.MachineTag(m.Id()) 196 } 197 198 // Life returns whether the machine is Alive, Dying or Dead. 199 func (m *Machine) Life() Life { 200 return m.doc.Life 201 } 202 203 // Jobs returns the responsibilities that must be fulfilled by m's agent. 204 func (m *Machine) Jobs() []MachineJob { 205 return m.doc.Jobs 206 } 207 208 // WantsVote reports whether the machine is a state server 209 // that wants to take part in peer voting. 210 func (m *Machine) WantsVote() bool { 211 return hasJob(m.doc.Jobs, JobManageEnviron) && !m.doc.NoVote 212 } 213 214 // HasVote reports whether that machine is currently a voting 215 // member of the replica set. 216 func (m *Machine) HasVote() bool { 217 return m.doc.HasVote 218 } 219 220 // SetHasVote sets whether the machine is currently a voting 221 // member of the replica set. It should only be called 222 // from the worker that maintains the replica set. 223 func (m *Machine) SetHasVote(hasVote bool) error { 224 ops := []txn.Op{{ 225 C: m.st.machines.Name, 226 Id: m.doc.Id, 227 Assert: notDeadDoc, 228 Update: D{{"$set", D{{"hasvote", hasVote}}}}, 229 }} 230 if err := m.st.runTransaction(ops); err != nil { 231 return fmt.Errorf("cannot set HasVote of machine %v: %v", m, onAbort(err, errDead)) 232 } 233 m.doc.HasVote = hasVote 234 return nil 235 } 236 237 // IsManager returns true if the machine has JobManageEnviron. 238 func (m *Machine) IsManager() bool { 239 return hasJob(m.doc.Jobs, JobManageEnviron) 240 } 241 242 // IsManual returns true if the machine was manually provisioned. 243 func (m *Machine) IsManual() (bool, error) { 244 // Apart from the bootstrap machine, manually provisioned 245 // machines have a nonce prefixed with "manual:". This is 246 // unique to manual provisioning. 247 if strings.HasPrefix(m.doc.Nonce, "manual:") { 248 return true, nil 249 } 250 // The bootstrap machine uses BootstrapNonce, so in that 251 // case we need to check if its provider type is "manual". 252 // We also check for "null", which is an alias for manual. 253 if m.doc.Id == "0" { 254 cfg, err := m.st.EnvironConfig() 255 if err != nil { 256 return false, err 257 } 258 t := cfg.Type() 259 return t == "null" || t == "manual", nil 260 } 261 return false, nil 262 } 263 264 // AgentTools returns the tools that the agent is currently running. 265 // It returns an error that satisfies IsNotFound if the tools have not yet been set. 266 func (m *Machine) AgentTools() (*tools.Tools, error) { 267 if m.doc.Tools == nil { 268 return nil, errors.NotFoundf("agent tools for machine %v", m) 269 } 270 tools := *m.doc.Tools 271 return &tools, nil 272 } 273 274 // checkVersionValidity checks whether the given version is suitable 275 // for passing to SetAgentVersion. 276 func checkVersionValidity(v version.Binary) error { 277 if v.Series == "" || v.Arch == "" { 278 return fmt.Errorf("empty series or arch") 279 } 280 return nil 281 } 282 283 // SetAgentVersion sets the version of juju that the agent is 284 // currently running. 285 func (m *Machine) SetAgentVersion(v version.Binary) (err error) { 286 defer utils.ErrorContextf(&err, "cannot set agent version for machine %v", m) 287 if err = checkVersionValidity(v); err != nil { 288 return err 289 } 290 tools := &tools.Tools{Version: v} 291 ops := []txn.Op{{ 292 C: m.st.machines.Name, 293 Id: m.doc.Id, 294 Assert: notDeadDoc, 295 Update: D{{"$set", D{{"tools", tools}}}}, 296 }} 297 if err := m.st.runTransaction(ops); err != nil { 298 return onAbort(err, errDead) 299 } 300 m.doc.Tools = tools 301 return nil 302 } 303 304 // SetMongoPassword sets the password the agent responsible for the machine 305 // should use to communicate with the state servers. Previous passwords 306 // are invalidated. 307 func (m *Machine) SetMongoPassword(password string) error { 308 return m.st.setMongoPassword(m.Tag(), password) 309 } 310 311 // SetPassword sets the password for the machine's agent. 312 func (m *Machine) SetPassword(password string) error { 313 if len(password) < utils.MinAgentPasswordLength { 314 return fmt.Errorf("password is only %d bytes long, and is not a valid Agent password", len(password)) 315 } 316 return m.setPasswordHash(utils.AgentPasswordHash(password)) 317 } 318 319 // setPasswordHash sets the underlying password hash in the database directly 320 // to the value supplied. This is split out from SetPassword to allow direct 321 // manipulation in tests (to check for backwards compatibility). 322 func (m *Machine) setPasswordHash(passwordHash string) error { 323 ops := []txn.Op{{ 324 C: m.st.machines.Name, 325 Id: m.doc.Id, 326 Assert: notDeadDoc, 327 Update: D{{"$set", D{{"passwordhash", passwordHash}}}}, 328 }} 329 if err := m.st.runTransaction(ops); err != nil { 330 return fmt.Errorf("cannot set password of machine %v: %v", m, onAbort(err, errDead)) 331 } 332 m.doc.PasswordHash = passwordHash 333 return nil 334 } 335 336 // Return the underlying PasswordHash stored in the database. Used by the test 337 // suite to check that the PasswordHash gets properly updated to new values 338 // when compatibility mode is detected. 339 func (m *Machine) getPasswordHash() string { 340 return m.doc.PasswordHash 341 } 342 343 // PasswordValid returns whether the given password is valid 344 // for the given machine. 345 func (m *Machine) PasswordValid(password string) bool { 346 agentHash := utils.AgentPasswordHash(password) 347 if agentHash == m.doc.PasswordHash { 348 return true 349 } 350 // In Juju 1.16 and older we used the slower password hash for unit 351 // agents. So check to see if the supplied password matches the old 352 // path, and if so, update it to the new mechanism. 353 // We ignore any error in setting the password, as we'll just try again 354 // next time 355 if utils.UserPasswordHash(password, utils.CompatSalt) == m.doc.PasswordHash { 356 logger.Debugf("%s logged in with old password hash, changing to AgentPasswordHash", 357 m.Tag()) 358 m.setPasswordHash(agentHash) 359 return true 360 } 361 return false 362 } 363 364 // Destroy sets the machine lifecycle to Dying if it is Alive. It does 365 // nothing otherwise. Destroy will fail if the machine has principal 366 // units assigned, or if the machine has JobManageEnviron. 367 // If the machine has assigned units, Destroy will return 368 // a HasAssignedUnitsError. 369 func (m *Machine) Destroy() error { 370 return m.advanceLifecycle(Dying) 371 } 372 373 // ForceDestroy queues the machine for complete removal, including the 374 // destruction of all units and containers on the machine. 375 func (m *Machine) ForceDestroy() error { 376 if !m.IsManager() { 377 ops := []txn.Op{{ 378 C: m.st.machines.Name, 379 Id: m.doc.Id, 380 Assert: D{{"jobs", D{{"$nin", []MachineJob{JobManageEnviron}}}}}, 381 }, m.st.newCleanupOp("machine", m.doc.Id)} 382 if err := m.st.runTransaction(ops); err != txn.ErrAborted { 383 return err 384 } 385 } 386 return fmt.Errorf("machine %s is required by the environment", m.doc.Id) 387 } 388 389 // EnsureDead sets the machine lifecycle to Dead if it is Alive or Dying. 390 // It does nothing otherwise. EnsureDead will fail if the machine has 391 // principal units assigned, or if the machine has JobManageEnviron. 392 // If the machine has assigned units, EnsureDead will return 393 // a HasAssignedUnitsError. 394 func (m *Machine) EnsureDead() error { 395 return m.advanceLifecycle(Dead) 396 } 397 398 type HasAssignedUnitsError struct { 399 MachineId string 400 UnitNames []string 401 } 402 403 func (e *HasAssignedUnitsError) Error() string { 404 return fmt.Sprintf("machine %s has unit %q assigned", e.MachineId, e.UnitNames[0]) 405 } 406 407 func IsHasAssignedUnitsError(err error) bool { 408 _, ok := err.(*HasAssignedUnitsError) 409 return ok 410 } 411 412 // Containers returns the container ids belonging to a parent machine. 413 // TODO(wallyworld): move this method to a service 414 func (m *Machine) Containers() ([]string, error) { 415 var mc machineContainers 416 err := m.st.containerRefs.FindId(m.Id()).One(&mc) 417 if err == nil { 418 return mc.Children, nil 419 } 420 if err == mgo.ErrNotFound { 421 return nil, errors.NotFoundf("container info for machine %v", m.Id()) 422 } 423 return nil, err 424 } 425 426 // ParentId returns the Id of the host machine if this machine is a container. 427 func (m *Machine) ParentId() (string, bool) { 428 parentId := ParentId(m.Id()) 429 return parentId, parentId != "" 430 } 431 432 type HasContainersError struct { 433 MachineId string 434 ContainerIds []string 435 } 436 437 func (e *HasContainersError) Error() string { 438 return fmt.Sprintf("machine %s is hosting containers %q", e.MachineId, strings.Join(e.ContainerIds, ",")) 439 } 440 441 func IsHasContainersError(err error) bool { 442 _, ok := err.(*HasContainersError) 443 return ok 444 } 445 446 // advanceLifecycle ensures that the machine's lifecycle is no earlier 447 // than the supplied value. If the machine already has that lifecycle 448 // value, or a later one, no changes will be made to remote state. If 449 // the machine has any responsibilities that preclude a valid change in 450 // lifecycle, it will return an error. 451 func (original *Machine) advanceLifecycle(life Life) (err error) { 452 containers, err := original.Containers() 453 if err != nil { 454 return err 455 } 456 if len(containers) > 0 { 457 return &HasContainersError{ 458 MachineId: original.doc.Id, 459 ContainerIds: containers, 460 } 461 } 462 m := original 463 defer func() { 464 if err == nil { 465 // The machine's lifecycle is known to have advanced; it may be 466 // known to have already advanced further than requested, in 467 // which case we set the latest known valid value. 468 if m == nil { 469 life = Dead 470 } else if m.doc.Life > life { 471 life = m.doc.Life 472 } 473 original.doc.Life = life 474 } 475 }() 476 // op and 477 op := txn.Op{ 478 C: m.st.machines.Name, 479 Id: m.doc.Id, 480 Update: D{{"$set", D{{"life", life}}}}, 481 } 482 advanceAsserts := D{ 483 {"jobs", D{{"$nin", []MachineJob{JobManageEnviron}}}}, 484 {"$or", []D{ 485 {{"principals", D{{"$size", 0}}}}, 486 {{"principals", D{{"$exists", false}}}}, 487 }}, 488 {"hasvote", D{{"$ne", true}}}, 489 } 490 // 3 attempts: one with original data, one with refreshed data, and a final 491 // one intended to determine the cause of failure of the preceding attempt. 492 for i := 0; i < 3; i++ { 493 // If the transaction was aborted, grab a fresh copy of the machine data. 494 // We don't write to original, because the expectation is that state- 495 // changing methods only set the requested change on the receiver; a case 496 // could perhaps be made that this is not a helpful convention in the 497 // context of the new state API, but we maintain consistency in the 498 // face of uncertainty. 499 if i != 0 { 500 if m, err = m.st.Machine(m.doc.Id); errors.IsNotFoundError(err) { 501 return nil 502 } else if err != nil { 503 return err 504 } 505 } 506 // Check that the life change is sane, and collect the assertions 507 // necessary to determine that it remains so. 508 switch life { 509 case Dying: 510 if m.doc.Life != Alive { 511 return nil 512 } 513 op.Assert = append(advanceAsserts, isAliveDoc...) 514 case Dead: 515 if m.doc.Life == Dead { 516 return nil 517 } 518 op.Assert = append(advanceAsserts, notDeadDoc...) 519 default: 520 panic(fmt.Errorf("cannot advance lifecycle to %v", life)) 521 } 522 // Check that the machine does not have any responsibilities that 523 // prevent a lifecycle change. 524 if hasJob(m.doc.Jobs, JobManageEnviron) { 525 // (NOTE: When we enable multiple JobManageEnviron machines, 526 // this restriction will be lifted, but we will assert that the 527 // machine is not voting) 528 return fmt.Errorf("machine %s is required by the environment", m.doc.Id) 529 } 530 if m.doc.HasVote { 531 return fmt.Errorf("machine %s is a voting replica set member", m.doc.Id) 532 } 533 if len(m.doc.Principals) != 0 { 534 return &HasAssignedUnitsError{ 535 MachineId: m.doc.Id, 536 UnitNames: m.doc.Principals, 537 } 538 } 539 // Run the transaction... 540 if err := m.st.runTransaction([]txn.Op{op}); err != txn.ErrAborted { 541 return err 542 } 543 // ...and retry on abort. 544 } 545 // In very rare circumstances, the final iteration above will have determined 546 // no cause of failure, and attempted a final transaction: if this also failed, 547 // we can be sure that the machine document is changing very fast, in a somewhat 548 // surprising fashion, and that it is sensible to back off for now. 549 return fmt.Errorf("machine %s cannot advance lifecycle: %v", m, ErrExcessiveContention) 550 } 551 552 // Remove removes the machine from state. It will fail if the machine is not 553 // Dead. 554 func (m *Machine) Remove() (err error) { 555 defer utils.ErrorContextf(&err, "cannot remove machine %s", m.doc.Id) 556 if m.doc.Life != Dead { 557 return fmt.Errorf("machine is not dead") 558 } 559 ops := []txn.Op{ 560 { 561 C: m.st.machines.Name, 562 Id: m.doc.Id, 563 Assert: txn.DocExists, 564 Remove: true, 565 }, 566 { 567 C: m.st.instanceData.Name, 568 Id: m.doc.Id, 569 Remove: true, 570 }, 571 removeStatusOp(m.st, m.globalKey()), 572 removeConstraintsOp(m.st, m.globalKey()), 573 annotationRemoveOp(m.st, m.globalKey()), 574 } 575 ops = append(ops, removeContainerRefOps(m.st, m.Id())...) 576 // The only abort conditions in play indicate that the machine has already 577 // been removed. 578 return onAbort(m.st.runTransaction(ops), nil) 579 } 580 581 // Refresh refreshes the contents of the machine from the underlying 582 // state. It returns an error that satisfies IsNotFound if the machine has 583 // been removed. 584 func (m *Machine) Refresh() error { 585 doc := machineDoc{} 586 err := m.st.machines.FindId(m.doc.Id).One(&doc) 587 if err == mgo.ErrNotFound { 588 return errors.NotFoundf("machine %v", m) 589 } 590 if err != nil { 591 return fmt.Errorf("cannot refresh machine %v: %v", m, err) 592 } 593 m.doc = doc 594 return nil 595 } 596 597 // AgentAlive returns whether the respective remote agent is alive. 598 func (m *Machine) AgentAlive() (bool, error) { 599 return m.st.pwatcher.Alive(m.globalKey()) 600 } 601 602 // WaitAgentAlive blocks until the respective agent is alive. 603 func (m *Machine) WaitAgentAlive(timeout time.Duration) (err error) { 604 defer utils.ErrorContextf(&err, "waiting for agent of machine %v", m) 605 ch := make(chan presence.Change) 606 m.st.pwatcher.Watch(m.globalKey(), ch) 607 defer m.st.pwatcher.Unwatch(m.globalKey(), ch) 608 for i := 0; i < 2; i++ { 609 select { 610 case change := <-ch: 611 if change.Alive { 612 return nil 613 } 614 case <-time.After(timeout): 615 return fmt.Errorf("still not alive after timeout") 616 case <-m.st.pwatcher.Dead(): 617 return m.st.pwatcher.Err() 618 } 619 } 620 panic(fmt.Sprintf("presence reported dead status twice in a row for machine %v", m)) 621 } 622 623 // SetAgentAlive signals that the agent for machine m is alive. 624 // It returns the started pinger. 625 func (m *Machine) SetAgentAlive() (*presence.Pinger, error) { 626 p := presence.NewPinger(m.st.presence, m.globalKey()) 627 err := p.Start() 628 if err != nil { 629 return nil, err 630 } 631 return p, nil 632 } 633 634 // InstanceId returns the provider specific instance id for this 635 // machine, or a NotProvisionedError, if not set. 636 func (m *Machine) InstanceId() (instance.Id, error) { 637 // SCHEMACHANGE 638 // TODO(wallyworld) - remove this backward compatibility code when schema upgrades are possible 639 // (we first check for InstanceId stored on the machineDoc) 640 if m.doc.InstanceId != "" { 641 return m.doc.InstanceId, nil 642 } 643 instData, err := getInstanceData(m.st, m.Id()) 644 if (err == nil && instData.InstanceId == "") || errors.IsNotFoundError(err) { 645 err = NotProvisionedError(m.Id()) 646 } 647 if err != nil { 648 return "", err 649 } 650 return instData.InstanceId, nil 651 } 652 653 // InstanceStatus returns the provider specific instance status for this machine, 654 // or a NotProvisionedError if instance is not yet provisioned. 655 func (m *Machine) InstanceStatus() (string, error) { 656 // SCHEMACHANGE 657 // InstanceId may not be stored in the instanceData doc, so we 658 // get it using an API on machine which knows to look in the old 659 // place if necessary. 660 instId, err := m.InstanceId() 661 if err != nil { 662 return "", err 663 } 664 instData, err := getInstanceData(m.st, m.Id()) 665 if (err == nil && instId == "") || errors.IsNotFoundError(err) { 666 err = NotProvisionedError(m.Id()) 667 } 668 if err != nil { 669 return "", err 670 } 671 return instData.Status, nil 672 } 673 674 // SetInstanceStatus sets the provider specific instance status for a machine. 675 func (m *Machine) SetInstanceStatus(status string) (err error) { 676 defer utils.ErrorContextf(&err, "cannot set instance status for machine %q", m) 677 678 // SCHEMACHANGE - we can't do this yet until the schema is updated 679 // so just do a txn.DocExists for now. 680 // provisioned := D{{"instanceid", D{{"$ne", ""}}}} 681 ops := []txn.Op{ 682 { 683 C: m.st.instanceData.Name, 684 Id: m.doc.Id, 685 Assert: txn.DocExists, 686 Update: D{{"$set", D{{"status", status}}}}, 687 }, 688 } 689 690 if err = m.st.runTransaction(ops); err == nil { 691 return nil 692 } else if err != txn.ErrAborted { 693 return err 694 } 695 return NotProvisionedError(m.Id()) 696 } 697 698 // Units returns all the units that have been assigned to the machine. 699 func (m *Machine) Units() (units []*Unit, err error) { 700 defer utils.ErrorContextf(&err, "cannot get units assigned to machine %v", m) 701 pudocs := []unitDoc{} 702 err = m.st.units.Find(D{{"machineid", m.doc.Id}}).All(&pudocs) 703 if err != nil { 704 return nil, err 705 } 706 for _, pudoc := range pudocs { 707 units = append(units, newUnit(m.st, &pudoc)) 708 docs := []unitDoc{} 709 err = m.st.units.Find(D{{"principal", pudoc.Name}}).All(&docs) 710 if err != nil { 711 return nil, err 712 } 713 for _, doc := range docs { 714 units = append(units, newUnit(m.st, &doc)) 715 } 716 } 717 return units, nil 718 } 719 720 // SetProvisioned sets the provider specific machine id, nonce and also metadata for 721 // this machine. Once set, the instance id cannot be changed. 722 // 723 // When provisioning an instance, a nonce should be created and passed 724 // when starting it, before adding the machine to the state. This means 725 // that if the provisioner crashes (or its connection to the state is 726 // lost) after starting the instance, we can be sure that only a single 727 // instance will be able to act for that machine. 728 func (m *Machine) SetProvisioned(id instance.Id, nonce string, characteristics *instance.HardwareCharacteristics) (err error) { 729 defer utils.ErrorContextf(&err, "cannot set instance data for machine %q", m) 730 731 if id == "" || nonce == "" { 732 return fmt.Errorf("instance id and nonce cannot be empty") 733 } 734 735 if characteristics == nil { 736 characteristics = &instance.HardwareCharacteristics{} 737 } 738 instData := &instanceData{ 739 Id: m.doc.Id, 740 InstanceId: id, 741 Arch: characteristics.Arch, 742 Mem: characteristics.Mem, 743 RootDisk: characteristics.RootDisk, 744 CpuCores: characteristics.CpuCores, 745 CpuPower: characteristics.CpuPower, 746 Tags: characteristics.Tags, 747 } 748 // SCHEMACHANGE 749 // TODO(wallyworld) - do not check instanceId on machineDoc after schema is upgraded 750 notSetYet := D{{"instanceid", ""}, {"nonce", ""}} 751 ops := []txn.Op{ 752 { 753 C: m.st.machines.Name, 754 Id: m.doc.Id, 755 Assert: append(isAliveDoc, notSetYet...), 756 Update: D{{"$set", D{{"instanceid", id}, {"nonce", nonce}}}}, 757 }, { 758 C: m.st.instanceData.Name, 759 Id: m.doc.Id, 760 Assert: txn.DocMissing, 761 Insert: instData, 762 }, 763 } 764 765 if err = m.st.runTransaction(ops); err == nil { 766 m.doc.Nonce = nonce 767 // SCHEMACHANGE 768 // TODO(wallyworld) - remove this backward compatibility code when schema upgrades are possible 769 // (InstanceId is stored on the instanceData document but we duplicate the value on the machineDoc. 770 m.doc.InstanceId = id 771 return nil 772 } else if err != txn.ErrAborted { 773 return err 774 } else if alive, err := isAlive(m.st.machines, m.doc.Id); err != nil { 775 return err 776 } else if !alive { 777 return errNotAlive 778 } 779 return fmt.Errorf("already set") 780 } 781 782 // notProvisionedError records an error when a machine is not provisioned. 783 type notProvisionedError struct { 784 machineId string 785 } 786 787 func NotProvisionedError(machineId string) error { 788 return ¬ProvisionedError{machineId} 789 } 790 791 func (e *notProvisionedError) Error() string { 792 return fmt.Sprintf("machine %v is not provisioned", e.machineId) 793 } 794 795 // IsNotProvisionedError returns true if err is a notProvisionedError. 796 func IsNotProvisionedError(err error) bool { 797 _, ok := err.(*notProvisionedError) 798 return ok 799 } 800 801 // Addresses returns any hostnames and ips associated with a machine, 802 // determined both by the machine itself, and by asking the provider. 803 // 804 // The addresses returned by the provider shadow any of the addresses 805 // that the machine reported with the same address value. 806 func (m *Machine) Addresses() (addresses []instance.Address) { 807 merged := make(map[string]instance.Address) 808 for _, address := range m.doc.MachineAddresses { 809 merged[address.Value] = address.InstanceAddress() 810 } 811 for _, address := range m.doc.Addresses { 812 merged[address.Value] = address.InstanceAddress() 813 } 814 for _, address := range merged { 815 addresses = append(addresses, address) 816 } 817 return 818 } 819 820 // SetAddresses records any addresses related to the machine, sourced 821 // by asking the provider. 822 func (m *Machine) SetAddresses(addresses []instance.Address) (err error) { 823 stateAddresses := instanceAddressesToAddresses(addresses) 824 ops := []txn.Op{ 825 { 826 C: m.st.machines.Name, 827 Id: m.doc.Id, 828 Assert: notDeadDoc, 829 Update: D{{"$set", D{{"addresses", stateAddresses}}}}, 830 }, 831 } 832 833 if err = m.st.runTransaction(ops); err != nil { 834 return fmt.Errorf("cannot set addresses of machine %v: %v", m, onAbort(err, errDead)) 835 } 836 m.doc.Addresses = stateAddresses 837 return nil 838 } 839 840 // MachineAddresses returns any hostnames and ips associated with a machine, 841 // determined by asking the machine itself. 842 func (m *Machine) MachineAddresses() (addresses []instance.Address) { 843 for _, address := range m.doc.MachineAddresses { 844 addresses = append(addresses, address.InstanceAddress()) 845 } 846 return 847 } 848 849 // SetMachineAddresses records any addresses related to the machine, sourced 850 // by asking the machine. 851 func (m *Machine) SetMachineAddresses(addresses []instance.Address) (err error) { 852 stateAddresses := instanceAddressesToAddresses(addresses) 853 ops := []txn.Op{ 854 { 855 C: m.st.machines.Name, 856 Id: m.doc.Id, 857 Assert: notDeadDoc, 858 Update: D{{"$set", D{{"machineaddresses", stateAddresses}}}}, 859 }, 860 } 861 862 if err = m.st.runTransaction(ops); err != nil { 863 return fmt.Errorf("cannot set machine addresses of machine %v: %v", m, onAbort(err, errDead)) 864 } 865 m.doc.MachineAddresses = stateAddresses 866 return nil 867 } 868 869 // CheckProvisioned returns true if the machine was provisioned with the given nonce. 870 func (m *Machine) CheckProvisioned(nonce string) bool { 871 return nonce == m.doc.Nonce && nonce != "" 872 } 873 874 // String returns a unique description of this machine. 875 func (m *Machine) String() string { 876 return m.doc.Id 877 } 878 879 // Constraints returns the exact constraints that should apply when provisioning 880 // an instance for the machine. 881 func (m *Machine) Constraints() (constraints.Value, error) { 882 return readConstraints(m.st, m.globalKey()) 883 } 884 885 // SetConstraints sets the exact constraints to apply when provisioning an 886 // instance for the machine. It will fail if the machine is Dead, or if it 887 // is already provisioned. 888 func (m *Machine) SetConstraints(cons constraints.Value) (err error) { 889 defer utils.ErrorContextf(&err, "cannot set constraints") 890 notSetYet := D{{"nonce", ""}} 891 ops := []txn.Op{ 892 { 893 C: m.st.machines.Name, 894 Id: m.doc.Id, 895 Assert: append(isAliveDoc, notSetYet...), 896 }, 897 setConstraintsOp(m.st, m.globalKey(), cons), 898 } 899 // 3 attempts is enough to push the ErrExcessiveContention case out of the 900 // realm of plausibility: it implies local state indicating unprovisioned, 901 // and remote state indicating provisioned (reasonable); but which changes 902 // back to unprovisioned and then to provisioned again with *very* specific 903 // timing in the course of this loop. 904 for i := 0; i < 3; i++ { 905 if m.doc.Life != Alive { 906 return errNotAlive 907 } 908 if _, err := m.InstanceId(); err == nil { 909 return fmt.Errorf("machine is already provisioned") 910 } else if !IsNotProvisionedError(err) { 911 return err 912 } 913 if err := m.st.runTransaction(ops); err != txn.ErrAborted { 914 return err 915 } 916 if m, err = m.st.Machine(m.doc.Id); err != nil { 917 return err 918 } 919 } 920 return ErrExcessiveContention 921 } 922 923 // Status returns the status of the machine. 924 func (m *Machine) Status() (status params.Status, info string, data params.StatusData, err error) { 925 doc, err := getStatus(m.st, m.globalKey()) 926 if err != nil { 927 return "", "", nil, err 928 } 929 status = doc.Status 930 info = doc.StatusInfo 931 data = doc.StatusData 932 return 933 } 934 935 // SetStatus sets the status of the machine. 936 func (m *Machine) SetStatus(status params.Status, info string, data params.StatusData) error { 937 doc := statusDoc{ 938 Status: status, 939 StatusInfo: info, 940 StatusData: data, 941 } 942 if err := doc.validateSet(); err != nil { 943 return err 944 } 945 ops := []txn.Op{{ 946 C: m.st.machines.Name, 947 Id: m.doc.Id, 948 Assert: notDeadDoc, 949 }, 950 updateStatusOp(m.st, m.globalKey(), doc), 951 } 952 if err := m.st.runTransaction(ops); err != nil { 953 return fmt.Errorf("cannot set status of machine %q: %v", m, onAbort(err, errNotAlive)) 954 } 955 return nil 956 } 957 958 // Clean returns true if the machine does not have any deployed units or containers. 959 func (m *Machine) Clean() bool { 960 return m.doc.Clean 961 } 962 963 // SupportedContainers returns any containers this machine is capable of hosting, and a bool 964 // indicating if the supported containers have been determined or not. 965 func (m *Machine) SupportedContainers() ([]instance.ContainerType, bool) { 966 return m.doc.SupportedContainers, m.doc.SupportedContainersKnown 967 } 968 969 // SupportsNoContainers records the fact that this machine doesn't support any containers. 970 func (m *Machine) SupportsNoContainers() (err error) { 971 if err = m.updateSupportedContainers([]instance.ContainerType{}); err != nil { 972 return err 973 } 974 return m.markInvalidContainers() 975 } 976 977 // SetSupportedContainers sets the list of containers supported by this machine. 978 func (m *Machine) SetSupportedContainers(containers []instance.ContainerType) (err error) { 979 if len(containers) == 0 { 980 return fmt.Errorf("at least one valid container type is required") 981 } 982 for _, container := range containers { 983 if container == instance.NONE { 984 return fmt.Errorf("%q is not a valid container type", container) 985 } 986 } 987 if err = m.updateSupportedContainers(containers); err != nil { 988 return err 989 } 990 return m.markInvalidContainers() 991 } 992 993 func isSupportedContainer(container instance.ContainerType, supportedContainers []instance.ContainerType) bool { 994 for _, supportedContainer := range supportedContainers { 995 if supportedContainer == container { 996 return true 997 } 998 } 999 return false 1000 } 1001 1002 // updateSupportedContainers sets the supported containers on this host machine. 1003 func (m *Machine) updateSupportedContainers(supportedContainers []instance.ContainerType) (err error) { 1004 ops := []txn.Op{ 1005 { 1006 C: m.st.machines.Name, 1007 Id: m.doc.Id, 1008 Assert: notDeadDoc, 1009 Update: D{ 1010 {"$set", D{ 1011 {"supportedcontainers", supportedContainers}, 1012 {"supportedcontainersknown", true}, 1013 }}}, 1014 }, 1015 } 1016 if err = m.st.runTransaction(ops); err != nil { 1017 return fmt.Errorf("cannot update supported containers of machine %v: %v", m, onAbort(err, errDead)) 1018 } 1019 m.doc.SupportedContainers = supportedContainers 1020 m.doc.SupportedContainersKnown = true 1021 return nil 1022 } 1023 1024 // markInvalidContainers sets the status of any container belonging to this machine 1025 // as being in error if the container type is not supported. 1026 func (m *Machine) markInvalidContainers() error { 1027 currentContainers, err := m.Containers() 1028 if err != nil { 1029 return err 1030 } 1031 for _, containerId := range currentContainers { 1032 if !isSupportedContainer(ContainerTypeFromId(containerId), m.doc.SupportedContainers) { 1033 container, err := m.st.Machine(containerId) 1034 if err != nil { 1035 logger.Errorf("loading container %v to mark as invalid: %v", containerId, err) 1036 continue 1037 } 1038 // There should never be a circumstance where an unsupported container is started. 1039 // Nonetheless, we check and log an error if such a situation arises. 1040 status, _, _, err := container.Status() 1041 if err != nil { 1042 logger.Errorf("finding status of container %v to mark as invalid: %v", containerId, err) 1043 continue 1044 } 1045 if status == params.StatusPending { 1046 containerType := ContainerTypeFromId(containerId) 1047 container.SetStatus( 1048 params.StatusError, "unsupported container", params.StatusData{"type": containerType}) 1049 } else { 1050 logger.Errorf("unsupported container %v has unexpected status %v", containerId, status) 1051 } 1052 } 1053 } 1054 return nil 1055 }