github.com/mattyw/juju@v0.0.0-20140610034352-732aecd63861/state/machine.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package state 5 6 import ( 7 "fmt" 8 "net" 9 "strings" 10 "time" 11 12 "github.com/juju/errors" 13 "github.com/juju/names" 14 "github.com/juju/utils" 15 "github.com/juju/utils/set" 16 "labix.org/v2/mgo" 17 "labix.org/v2/mgo/bson" 18 "labix.org/v2/mgo/txn" 19 20 "github.com/juju/juju/constraints" 21 "github.com/juju/juju/instance" 22 "github.com/juju/juju/state/api/params" 23 "github.com/juju/juju/state/presence" 24 "github.com/juju/juju/tools" 25 "github.com/juju/juju/version" 26 ) 27 28 // Machine represents the state of a machine. 29 type Machine struct { 30 st *State 31 doc machineDoc 32 annotator 33 } 34 35 // MachineJob values define responsibilities that machines may be 36 // expected to fulfil. 37 type MachineJob int 38 39 const ( 40 _ MachineJob = iota 41 JobHostUnits 42 JobManageEnviron 43 44 // Deprecated in 1.18. 45 JobManageStateDeprecated 46 ) 47 48 var jobNames = map[MachineJob]params.MachineJob{ 49 JobHostUnits: params.JobHostUnits, 50 JobManageEnviron: params.JobManageEnviron, 51 52 // Deprecated in 1.18. 53 JobManageStateDeprecated: params.JobManageStateDeprecated, 54 } 55 56 // AllJobs returns all supported machine jobs. 57 func AllJobs() []MachineJob { 58 return []MachineJob{JobHostUnits, JobManageEnviron} 59 } 60 61 // ToParams returns the job as params.MachineJob. 62 func (job MachineJob) ToParams() params.MachineJob { 63 if paramsJob, ok := jobNames[job]; ok { 64 return paramsJob 65 } 66 return params.MachineJob(fmt.Sprintf("<unknown job %d>", int(job))) 67 } 68 69 // MachineJobFromParams returns the job corresponding to params.MachineJob. 70 func MachineJobFromParams(job params.MachineJob) (MachineJob, error) { 71 for machineJob, paramJob := range jobNames { 72 if paramJob == job { 73 return machineJob, nil 74 } 75 } 76 return -1, fmt.Errorf("invalid machine job %q", job) 77 } 78 79 // paramsJobsFromJobs converts state jobs to params jobs. 80 func paramsJobsFromJobs(jobs []MachineJob) []params.MachineJob { 81 paramsJobs := make([]params.MachineJob, len(jobs)) 82 for i, machineJob := range jobs { 83 paramsJobs[i] = machineJob.ToParams() 84 } 85 return paramsJobs 86 } 87 88 func (job MachineJob) String() string { 89 return string(job.ToParams()) 90 } 91 92 // machineDoc represents the internal state of a machine in MongoDB. 93 // Note the correspondence with MachineInfo in state/api/params. 94 type machineDoc struct { 95 Id string `bson:"_id"` 96 Nonce string 97 Series string 98 ContainerType string 99 Principals []string 100 Life Life 101 Tools *tools.Tools `bson:",omitempty"` 102 Jobs []MachineJob 103 NoVote bool 104 HasVote bool 105 PasswordHash string 106 Clean bool 107 // We store 2 different sets of addresses for the machine, obtained 108 // from different sources. 109 // Addresses is the set of addresses obtained by asking the provider. 110 Addresses []address 111 // MachineAddresses is the set of addresses obtained from the machine itself. 112 MachineAddresses []address 113 // The SupportedContainers attributes are used to advertise what containers this 114 // machine is capable of hosting. 115 SupportedContainersKnown bool 116 SupportedContainers []instance.ContainerType `bson:",omitempty"` 117 // Placement is the placement directive that should be used when provisioning 118 // an instance for the machine. 119 Placement string `bson:",omitempty"` 120 // Deprecated. InstanceId, now lives on instanceData. 121 // This attribute is retained so that data from existing machines can be read. 122 // SCHEMACHANGE 123 // TODO(wallyworld): remove this attribute when schema upgrades are possible. 124 InstanceId instance.Id 125 } 126 127 func newMachine(st *State, doc *machineDoc) *Machine { 128 machine := &Machine{ 129 st: st, 130 doc: *doc, 131 } 132 machine.annotator = annotator{ 133 globalKey: machine.globalKey(), 134 tag: machine.Tag(), 135 st: st, 136 } 137 return machine 138 } 139 140 // Id returns the machine id. 141 func (m *Machine) Id() string { 142 return m.doc.Id 143 } 144 145 // Series returns the operating system series running on the machine. 146 func (m *Machine) Series() string { 147 return m.doc.Series 148 } 149 150 // ContainerType returns the type of container hosting this machine. 151 func (m *Machine) ContainerType() instance.ContainerType { 152 return instance.ContainerType(m.doc.ContainerType) 153 } 154 155 // machineGlobalKey returns the global database key for the identified machine. 156 func machineGlobalKey(id string) string { 157 return "m#" + id 158 } 159 160 // globalKey returns the global database key for the machine. 161 func (m *Machine) globalKey() string { 162 return machineGlobalKey(m.doc.Id) 163 } 164 165 // instanceData holds attributes relevant to a provisioned machine. 166 type instanceData struct { 167 Id string `bson:"_id"` 168 InstanceId instance.Id `bson:"instanceid"` 169 Status string `bson:"status,omitempty"` 170 Arch *string `bson:"arch,omitempty"` 171 Mem *uint64 `bson:"mem,omitempty"` 172 RootDisk *uint64 `bson:"rootdisk,omitempty"` 173 CpuCores *uint64 `bson:"cpucores,omitempty"` 174 CpuPower *uint64 `bson:"cpupower,omitempty"` 175 Tags *[]string `bson:"tags,omitempty"` 176 } 177 178 func hardwareCharacteristics(instData instanceData) *instance.HardwareCharacteristics { 179 return &instance.HardwareCharacteristics{ 180 Arch: instData.Arch, 181 Mem: instData.Mem, 182 RootDisk: instData.RootDisk, 183 CpuCores: instData.CpuCores, 184 CpuPower: instData.CpuPower, 185 Tags: instData.Tags, 186 } 187 } 188 189 // TODO(wallyworld): move this method to a service. 190 func (m *Machine) HardwareCharacteristics() (*instance.HardwareCharacteristics, error) { 191 instData, err := getInstanceData(m.st, m.Id()) 192 if err != nil { 193 return nil, err 194 } 195 return hardwareCharacteristics(instData), nil 196 } 197 198 func getInstanceData(st *State, id string) (instanceData, error) { 199 var instData instanceData 200 err := st.instanceData.FindId(id).One(&instData) 201 if err == mgo.ErrNotFound { 202 return instanceData{}, errors.NotFoundf("instance data for machine %v", id) 203 } 204 if err != nil { 205 return instanceData{}, fmt.Errorf("cannot get instance data for machine %v: %v", id, err) 206 } 207 return instData, nil 208 } 209 210 // Tag returns a name identifying the machine that is safe to use 211 // as a file name. The returned name will be different from other 212 // Tag values returned by any other entities from the same state. 213 func (m *Machine) Tag() string { 214 return names.MachineTag(m.Id()) 215 } 216 217 // Life returns whether the machine is Alive, Dying or Dead. 218 func (m *Machine) Life() Life { 219 return m.doc.Life 220 } 221 222 // Jobs returns the responsibilities that must be fulfilled by m's agent. 223 func (m *Machine) Jobs() []MachineJob { 224 return m.doc.Jobs 225 } 226 227 // WantsVote reports whether the machine is a state server 228 // that wants to take part in peer voting. 229 func (m *Machine) WantsVote() bool { 230 return hasJob(m.doc.Jobs, JobManageEnviron) && !m.doc.NoVote 231 } 232 233 // HasVote reports whether that machine is currently a voting 234 // member of the replica set. 235 func (m *Machine) HasVote() bool { 236 return m.doc.HasVote 237 } 238 239 // SetHasVote sets whether the machine is currently a voting 240 // member of the replica set. It should only be called 241 // from the worker that maintains the replica set. 242 func (m *Machine) SetHasVote(hasVote bool) error { 243 ops := []txn.Op{{ 244 C: m.st.machines.Name, 245 Id: m.doc.Id, 246 Assert: notDeadDoc, 247 Update: bson.D{{"$set", bson.D{{"hasvote", hasVote}}}}, 248 }} 249 if err := m.st.runTransaction(ops); err != nil { 250 return fmt.Errorf("cannot set HasVote of machine %v: %v", m, onAbort(err, errDead)) 251 } 252 m.doc.HasVote = hasVote 253 return nil 254 } 255 256 // IsManager returns true if the machine has JobManageEnviron. 257 func (m *Machine) IsManager() bool { 258 return hasJob(m.doc.Jobs, JobManageEnviron) 259 } 260 261 // IsManual returns true if the machine was manually provisioned. 262 func (m *Machine) IsManual() (bool, error) { 263 // Apart from the bootstrap machine, manually provisioned 264 // machines have a nonce prefixed with "manual:". This is 265 // unique to manual provisioning. 266 if strings.HasPrefix(m.doc.Nonce, "manual:") { 267 return true, nil 268 } 269 // The bootstrap machine uses BootstrapNonce, so in that 270 // case we need to check if its provider type is "manual". 271 // We also check for "null", which is an alias for manual. 272 if m.doc.Id == "0" { 273 cfg, err := m.st.EnvironConfig() 274 if err != nil { 275 return false, err 276 } 277 t := cfg.Type() 278 return t == "null" || t == "manual", nil 279 } 280 return false, nil 281 } 282 283 // AgentTools returns the tools that the agent is currently running. 284 // It returns an error that satisfies errors.IsNotFound if the tools 285 // have not yet been set. 286 func (m *Machine) AgentTools() (*tools.Tools, error) { 287 if m.doc.Tools == nil { 288 return nil, errors.NotFoundf("agent tools for machine %v", m) 289 } 290 tools := *m.doc.Tools 291 return &tools, nil 292 } 293 294 // checkVersionValidity checks whether the given version is suitable 295 // for passing to SetAgentVersion. 296 func checkVersionValidity(v version.Binary) error { 297 if v.Series == "" || v.Arch == "" { 298 return fmt.Errorf("empty series or arch") 299 } 300 return nil 301 } 302 303 // SetAgentVersion sets the version of juju that the agent is 304 // currently running. 305 func (m *Machine) SetAgentVersion(v version.Binary) (err error) { 306 defer errors.Maskf(&err, "cannot set agent version for machine %v", m) 307 if err = checkVersionValidity(v); err != nil { 308 return err 309 } 310 tools := &tools.Tools{Version: v} 311 ops := []txn.Op{{ 312 C: m.st.machines.Name, 313 Id: m.doc.Id, 314 Assert: notDeadDoc, 315 Update: bson.D{{"$set", bson.D{{"tools", tools}}}}, 316 }} 317 if err := m.st.runTransaction(ops); err != nil { 318 return onAbort(err, errDead) 319 } 320 m.doc.Tools = tools 321 return nil 322 } 323 324 // SetMongoPassword sets the password the agent responsible for the machine 325 // should use to communicate with the state servers. Previous passwords 326 // are invalidated. 327 func (m *Machine) SetMongoPassword(password string) error { 328 return m.st.setMongoPassword(m.Tag(), password) 329 } 330 331 // SetPassword sets the password for the machine's agent. 332 func (m *Machine) SetPassword(password string) error { 333 if len(password) < utils.MinAgentPasswordLength { 334 return fmt.Errorf("password is only %d bytes long, and is not a valid Agent password", len(password)) 335 } 336 return m.setPasswordHash(utils.AgentPasswordHash(password)) 337 } 338 339 // setPasswordHash sets the underlying password hash in the database directly 340 // to the value supplied. This is split out from SetPassword to allow direct 341 // manipulation in tests (to check for backwards compatibility). 342 func (m *Machine) setPasswordHash(passwordHash string) error { 343 ops := []txn.Op{{ 344 C: m.st.machines.Name, 345 Id: m.doc.Id, 346 Assert: notDeadDoc, 347 Update: bson.D{{"$set", bson.D{{"passwordhash", passwordHash}}}}, 348 }} 349 if err := m.st.runTransaction(ops); err != nil { 350 return fmt.Errorf("cannot set password of machine %v: %v", m, onAbort(err, errDead)) 351 } 352 m.doc.PasswordHash = passwordHash 353 return nil 354 } 355 356 // Return the underlying PasswordHash stored in the database. Used by the test 357 // suite to check that the PasswordHash gets properly updated to new values 358 // when compatibility mode is detected. 359 func (m *Machine) getPasswordHash() string { 360 return m.doc.PasswordHash 361 } 362 363 // PasswordValid returns whether the given password is valid 364 // for the given machine. 365 func (m *Machine) PasswordValid(password string) bool { 366 agentHash := utils.AgentPasswordHash(password) 367 if agentHash == m.doc.PasswordHash { 368 return true 369 } 370 // In Juju 1.16 and older we used the slower password hash for unit 371 // agents. So check to see if the supplied password matches the old 372 // path, and if so, update it to the new mechanism. 373 // We ignore any error in setting the password, as we'll just try again 374 // next time 375 if utils.UserPasswordHash(password, utils.CompatSalt) == m.doc.PasswordHash { 376 logger.Debugf("%s logged in with old password hash, changing to AgentPasswordHash", 377 m.Tag()) 378 m.setPasswordHash(agentHash) 379 return true 380 } 381 return false 382 } 383 384 // Destroy sets the machine lifecycle to Dying if it is Alive. It does 385 // nothing otherwise. Destroy will fail if the machine has principal 386 // units assigned, or if the machine has JobManageEnviron. 387 // If the machine has assigned units, Destroy will return 388 // a HasAssignedUnitsError. 389 func (m *Machine) Destroy() error { 390 return m.advanceLifecycle(Dying) 391 } 392 393 // ForceDestroy queues the machine for complete removal, including the 394 // destruction of all units and containers on the machine. 395 func (m *Machine) ForceDestroy() error { 396 if !m.IsManager() { 397 ops := []txn.Op{{ 398 C: m.st.machines.Name, 399 Id: m.doc.Id, 400 Assert: bson.D{{"jobs", bson.D{{"$nin", []MachineJob{JobManageEnviron}}}}}, 401 }, m.st.newCleanupOp(cleanupForceDestroyedMachine, m.doc.Id)} 402 if err := m.st.runTransaction(ops); err != txn.ErrAborted { 403 return err 404 } 405 } 406 return fmt.Errorf("machine %s is required by the environment", m.doc.Id) 407 } 408 409 // EnsureDead sets the machine lifecycle to Dead if it is Alive or Dying. 410 // It does nothing otherwise. EnsureDead will fail if the machine has 411 // principal units assigned, or if the machine has JobManageEnviron. 412 // If the machine has assigned units, EnsureDead will return 413 // a HasAssignedUnitsError. 414 func (m *Machine) EnsureDead() error { 415 return m.advanceLifecycle(Dead) 416 } 417 418 type HasAssignedUnitsError struct { 419 MachineId string 420 UnitNames []string 421 } 422 423 func (e *HasAssignedUnitsError) Error() string { 424 return fmt.Sprintf("machine %s has unit %q assigned", e.MachineId, e.UnitNames[0]) 425 } 426 427 func IsHasAssignedUnitsError(err error) bool { 428 _, ok := err.(*HasAssignedUnitsError) 429 return ok 430 } 431 432 // Containers returns the container ids belonging to a parent machine. 433 // TODO(wallyworld): move this method to a service 434 func (m *Machine) Containers() ([]string, error) { 435 var mc machineContainers 436 err := m.st.containerRefs.FindId(m.Id()).One(&mc) 437 if err == nil { 438 return mc.Children, nil 439 } 440 if err == mgo.ErrNotFound { 441 return nil, errors.NotFoundf("container info for machine %v", m.Id()) 442 } 443 return nil, err 444 } 445 446 // ParentId returns the Id of the host machine if this machine is a container. 447 func (m *Machine) ParentId() (string, bool) { 448 parentId := ParentId(m.Id()) 449 return parentId, parentId != "" 450 } 451 452 type HasContainersError struct { 453 MachineId string 454 ContainerIds []string 455 } 456 457 func (e *HasContainersError) Error() string { 458 return fmt.Sprintf("machine %s is hosting containers %q", e.MachineId, strings.Join(e.ContainerIds, ",")) 459 } 460 461 func IsHasContainersError(err error) bool { 462 _, ok := err.(*HasContainersError) 463 return ok 464 } 465 466 // advanceLifecycle ensures that the machine's lifecycle is no earlier 467 // than the supplied value. If the machine already has that lifecycle 468 // value, or a later one, no changes will be made to remote state. If 469 // the machine has any responsibilities that preclude a valid change in 470 // lifecycle, it will return an error. 471 func (original *Machine) advanceLifecycle(life Life) (err error) { 472 containers, err := original.Containers() 473 if err != nil { 474 return err 475 } 476 if len(containers) > 0 { 477 return &HasContainersError{ 478 MachineId: original.doc.Id, 479 ContainerIds: containers, 480 } 481 } 482 m := original 483 defer func() { 484 if err == nil { 485 // The machine's lifecycle is known to have advanced; it may be 486 // known to have already advanced further than requested, in 487 // which case we set the latest known valid value. 488 if m == nil { 489 life = Dead 490 } else if m.doc.Life > life { 491 life = m.doc.Life 492 } 493 original.doc.Life = life 494 } 495 }() 496 // op and 497 op := txn.Op{ 498 C: m.st.machines.Name, 499 Id: m.doc.Id, 500 Update: bson.D{{"$set", bson.D{{"life", life}}}}, 501 } 502 advanceAsserts := bson.D{ 503 {"jobs", bson.D{{"$nin", []MachineJob{JobManageEnviron}}}}, 504 {"$or", []bson.D{ 505 {{"principals", bson.D{{"$size", 0}}}}, 506 {{"principals", bson.D{{"$exists", false}}}}, 507 }}, 508 {"hasvote", bson.D{{"$ne", true}}}, 509 } 510 // 3 attempts: one with original data, one with refreshed data, and a final 511 // one intended to determine the cause of failure of the preceding attempt. 512 for i := 0; i < 3; i++ { 513 // If the transaction was aborted, grab a fresh copy of the machine data. 514 // We don't write to original, because the expectation is that state- 515 // changing methods only set the requested change on the receiver; a case 516 // could perhaps be made that this is not a helpful convention in the 517 // context of the new state API, but we maintain consistency in the 518 // face of uncertainty. 519 if i != 0 { 520 if m, err = m.st.Machine(m.doc.Id); errors.IsNotFound(err) { 521 return nil 522 } else if err != nil { 523 return err 524 } 525 } 526 // Check that the life change is sane, and collect the assertions 527 // necessary to determine that it remains so. 528 switch life { 529 case Dying: 530 if m.doc.Life != Alive { 531 return nil 532 } 533 op.Assert = append(advanceAsserts, isAliveDoc...) 534 case Dead: 535 if m.doc.Life == Dead { 536 return nil 537 } 538 op.Assert = append(advanceAsserts, notDeadDoc...) 539 default: 540 panic(fmt.Errorf("cannot advance lifecycle to %v", life)) 541 } 542 // Check that the machine does not have any responsibilities that 543 // prevent a lifecycle change. 544 if hasJob(m.doc.Jobs, JobManageEnviron) { 545 // (NOTE: When we enable multiple JobManageEnviron machines, 546 // this restriction will be lifted, but we will assert that the 547 // machine is not voting) 548 return fmt.Errorf("machine %s is required by the environment", m.doc.Id) 549 } 550 if m.doc.HasVote { 551 return fmt.Errorf("machine %s is a voting replica set member", m.doc.Id) 552 } 553 if len(m.doc.Principals) != 0 { 554 return &HasAssignedUnitsError{ 555 MachineId: m.doc.Id, 556 UnitNames: m.doc.Principals, 557 } 558 } 559 // Run the transaction... 560 if err := m.st.runTransaction([]txn.Op{op}); err != txn.ErrAborted { 561 return err 562 } 563 // ...and retry on abort. 564 } 565 // In very rare circumstances, the final iteration above will have determined 566 // no cause of failure, and attempted a final transaction: if this also failed, 567 // we can be sure that the machine document is changing very fast, in a somewhat 568 // surprising fashion, and that it is sensible to back off for now. 569 return fmt.Errorf("machine %s cannot advance lifecycle: %v", m, ErrExcessiveContention) 570 } 571 572 func (m *Machine) removeNetworkInterfacesOps() ([]txn.Op, error) { 573 if m.doc.Life != Dead { 574 return nil, fmt.Errorf("machine is not dead") 575 } 576 var doc networkInterfaceDoc 577 ops := []txn.Op{{ 578 C: m.st.machines.Name, 579 Id: m.doc.Id, 580 Assert: isDeadDoc, 581 }} 582 sel := bson.D{{"machineid", m.doc.Id}} 583 iter := m.st.networkInterfaces.Find(sel).Select(bson.D{{"_id", 1}}).Iter() 584 for iter.Next(&doc) { 585 ops = append(ops, txn.Op{ 586 C: m.st.networkInterfaces.Name, 587 Id: doc.Id, 588 Remove: true, 589 }) 590 } 591 if err := iter.Err(); err != nil { 592 return nil, err 593 } 594 return ops, nil 595 } 596 597 // Remove removes the machine from state. It will fail if the machine 598 // is not Dead. 599 func (m *Machine) Remove() (err error) { 600 defer errors.Maskf(&err, "cannot remove machine %s", m.doc.Id) 601 if m.doc.Life != Dead { 602 return fmt.Errorf("machine is not dead") 603 } 604 ops := []txn.Op{ 605 { 606 C: m.st.machines.Name, 607 Id: m.doc.Id, 608 Assert: txn.DocExists, 609 Remove: true, 610 }, 611 { 612 C: m.st.machines.Name, 613 Id: m.doc.Id, 614 Assert: isDeadDoc, 615 }, 616 { 617 C: m.st.instanceData.Name, 618 Id: m.doc.Id, 619 Remove: true, 620 }, 621 removeStatusOp(m.st, m.globalKey()), 622 removeConstraintsOp(m.st, m.globalKey()), 623 removeRequestedNetworksOp(m.st, m.globalKey()), 624 annotationRemoveOp(m.st, m.globalKey()), 625 } 626 ifacesOps, err := m.removeNetworkInterfacesOps() 627 if err != nil { 628 return err 629 } 630 ops = append(ops, ifacesOps...) 631 ops = append(ops, removeContainerRefOps(m.st, m.Id())...) 632 // The only abort conditions in play indicate that the machine has already 633 // been removed. 634 return onAbort(m.st.runTransaction(ops), nil) 635 } 636 637 // Refresh refreshes the contents of the machine from the underlying 638 // state. It returns an error that satisfies errors.IsNotFound if the 639 // machine has been removed. 640 func (m *Machine) Refresh() error { 641 doc := machineDoc{} 642 err := m.st.machines.FindId(m.doc.Id).One(&doc) 643 if err == mgo.ErrNotFound { 644 return errors.NotFoundf("machine %v", m) 645 } 646 if err != nil { 647 return fmt.Errorf("cannot refresh machine %v: %v", m, err) 648 } 649 m.doc = doc 650 return nil 651 } 652 653 // AgentAlive returns whether the respective remote agent is alive. 654 func (m *Machine) AgentAlive() (bool, error) { 655 return m.st.pwatcher.Alive(m.globalKey()) 656 } 657 658 // WaitAgentAlive blocks until the respective agent is alive. 659 func (m *Machine) WaitAgentAlive(timeout time.Duration) (err error) { 660 defer errors.Maskf(&err, "waiting for agent of machine %v", m) 661 ch := make(chan presence.Change) 662 m.st.pwatcher.Watch(m.globalKey(), ch) 663 defer m.st.pwatcher.Unwatch(m.globalKey(), ch) 664 for i := 0; i < 2; i++ { 665 select { 666 case change := <-ch: 667 if change.Alive { 668 return nil 669 } 670 case <-time.After(timeout): 671 return fmt.Errorf("still not alive after timeout") 672 case <-m.st.pwatcher.Dead(): 673 return m.st.pwatcher.Err() 674 } 675 } 676 panic(fmt.Sprintf("presence reported dead status twice in a row for machine %v", m)) 677 } 678 679 // SetAgentAlive signals that the agent for machine m is alive. 680 // It returns the started pinger. 681 func (m *Machine) SetAgentAlive() (*presence.Pinger, error) { 682 p := presence.NewPinger(m.st.presence, m.globalKey()) 683 err := p.Start() 684 if err != nil { 685 return nil, err 686 } 687 return p, nil 688 } 689 690 // InstanceId returns the provider specific instance id for this 691 // machine, or a NotProvisionedError, if not set. 692 func (m *Machine) InstanceId() (instance.Id, error) { 693 // SCHEMACHANGE 694 // TODO(wallyworld) - remove this backward compatibility code when schema upgrades are possible 695 // (we first check for InstanceId stored on the machineDoc) 696 if m.doc.InstanceId != "" { 697 return m.doc.InstanceId, nil 698 } 699 instData, err := getInstanceData(m.st, m.Id()) 700 if (err == nil && instData.InstanceId == "") || errors.IsNotFound(err) { 701 err = NotProvisionedError(m.Id()) 702 } 703 if err != nil { 704 return "", err 705 } 706 return instData.InstanceId, nil 707 } 708 709 // InstanceStatus returns the provider specific instance status for this machine, 710 // or a NotProvisionedError if instance is not yet provisioned. 711 func (m *Machine) InstanceStatus() (string, error) { 712 // SCHEMACHANGE 713 // InstanceId may not be stored in the instanceData doc, so we 714 // get it using an API on machine which knows to look in the old 715 // place if necessary. 716 instId, err := m.InstanceId() 717 if err != nil { 718 return "", err 719 } 720 instData, err := getInstanceData(m.st, m.Id()) 721 if (err == nil && instId == "") || errors.IsNotFound(err) { 722 err = NotProvisionedError(m.Id()) 723 } 724 if err != nil { 725 return "", err 726 } 727 return instData.Status, nil 728 } 729 730 // SetInstanceStatus sets the provider specific instance status for a machine. 731 func (m *Machine) SetInstanceStatus(status string) (err error) { 732 defer errors.Maskf(&err, "cannot set instance status for machine %q", m) 733 734 // SCHEMACHANGE - we can't do this yet until the schema is updated 735 // so just do a txn.DocExists for now. 736 // provisioned := bson.D{{"instanceid", bson.D{{"$ne", ""}}}} 737 ops := []txn.Op{ 738 { 739 C: m.st.instanceData.Name, 740 Id: m.doc.Id, 741 Assert: txn.DocExists, 742 Update: bson.D{{"$set", bson.D{{"status", status}}}}, 743 }, 744 } 745 746 if err = m.st.runTransaction(ops); err == nil { 747 return nil 748 } else if err != txn.ErrAborted { 749 return err 750 } 751 return NotProvisionedError(m.Id()) 752 } 753 754 // Units returns all the units that have been assigned to the machine. 755 func (m *Machine) Units() (units []*Unit, err error) { 756 defer errors.Maskf(&err, "cannot get units assigned to machine %v", m) 757 pudocs := []unitDoc{} 758 err = m.st.units.Find(bson.D{{"machineid", m.doc.Id}}).All(&pudocs) 759 if err != nil { 760 return nil, err 761 } 762 for _, pudoc := range pudocs { 763 units = append(units, newUnit(m.st, &pudoc)) 764 docs := []unitDoc{} 765 err = m.st.units.Find(bson.D{{"principal", pudoc.Name}}).All(&docs) 766 if err != nil { 767 return nil, err 768 } 769 for _, doc := range docs { 770 units = append(units, newUnit(m.st, &doc)) 771 } 772 } 773 return units, nil 774 } 775 776 // SetProvisioned sets the provider specific machine id, nonce and also metadata for 777 // this machine. Once set, the instance id cannot be changed. 778 // 779 // When provisioning an instance, a nonce should be created and passed 780 // when starting it, before adding the machine to the state. This means 781 // that if the provisioner crashes (or its connection to the state is 782 // lost) after starting the instance, we can be sure that only a single 783 // instance will be able to act for that machine. 784 func (m *Machine) SetProvisioned(id instance.Id, nonce string, characteristics *instance.HardwareCharacteristics) (err error) { 785 defer errors.Maskf(&err, "cannot set instance data for machine %q", m) 786 787 if id == "" || nonce == "" { 788 return fmt.Errorf("instance id and nonce cannot be empty") 789 } 790 791 if characteristics == nil { 792 characteristics = &instance.HardwareCharacteristics{} 793 } 794 instData := &instanceData{ 795 Id: m.doc.Id, 796 InstanceId: id, 797 Arch: characteristics.Arch, 798 Mem: characteristics.Mem, 799 RootDisk: characteristics.RootDisk, 800 CpuCores: characteristics.CpuCores, 801 CpuPower: characteristics.CpuPower, 802 Tags: characteristics.Tags, 803 } 804 // SCHEMACHANGE 805 // TODO(wallyworld) - do not check instanceId on machineDoc after schema is upgraded 806 notSetYet := bson.D{{"instanceid", ""}, {"nonce", ""}} 807 ops := []txn.Op{ 808 { 809 C: m.st.machines.Name, 810 Id: m.doc.Id, 811 Assert: append(isAliveDoc, notSetYet...), 812 Update: bson.D{{"$set", bson.D{{"instanceid", id}, {"nonce", nonce}}}}, 813 }, { 814 C: m.st.instanceData.Name, 815 Id: m.doc.Id, 816 Assert: txn.DocMissing, 817 Insert: instData, 818 }, 819 } 820 821 if err = m.st.runTransaction(ops); err == nil { 822 m.doc.Nonce = nonce 823 // SCHEMACHANGE 824 // TODO(wallyworld) - remove this backward compatibility code when schema upgrades are possible 825 // (InstanceId is stored on the instanceData document but we duplicate the value on the machineDoc. 826 m.doc.InstanceId = id 827 return nil 828 } else if err != txn.ErrAborted { 829 return err 830 } else if alive, err := isAlive(m.st.machines, m.doc.Id); err != nil { 831 return err 832 } else if !alive { 833 return errNotAlive 834 } 835 return fmt.Errorf("already set") 836 } 837 838 // SetInstanceInfo is used to provision a machine and in one steps set 839 // it's instance id, nonce, hardware characteristics, add networks and 840 // network interfaces as needed. 841 // 842 // TODO(dimitern) Do all the operations described in a single 843 // transaction, rather than using separate calls. Alternatively, 844 // we can add all the things to create/set in a document in some 845 // collection and have a worker that takes care of the actual work. 846 // Merge SetProvisioned() in here or drop it at that point. 847 func (m *Machine) SetInstanceInfo( 848 id instance.Id, nonce string, characteristics *instance.HardwareCharacteristics, 849 networks []NetworkInfo, interfaces []NetworkInterfaceInfo) error { 850 851 // Add the networks and interfaces first. 852 for _, network := range networks { 853 _, err := m.st.AddNetwork(network) 854 if err != nil && errors.IsAlreadyExists(err) { 855 // Ignore already existing networks. 856 continue 857 } else if err != nil { 858 return err 859 } 860 } 861 for _, iface := range interfaces { 862 _, err := m.AddNetworkInterface(iface) 863 if err != nil && errors.IsAlreadyExists(err) { 864 // Ignore already existing network interfaces. 865 continue 866 } else if err != nil { 867 return err 868 } 869 } 870 return m.SetProvisioned(id, nonce, characteristics) 871 } 872 873 // notProvisionedError records an error when a machine is not provisioned. 874 type notProvisionedError struct { 875 machineId string 876 } 877 878 func NotProvisionedError(machineId string) error { 879 return ¬ProvisionedError{machineId} 880 } 881 882 func (e *notProvisionedError) Error() string { 883 return fmt.Sprintf("machine %v is not provisioned", e.machineId) 884 } 885 886 // IsNotProvisionedError returns true if err is a notProvisionedError. 887 func IsNotProvisionedError(err error) bool { 888 _, ok := err.(*notProvisionedError) 889 return ok 890 } 891 892 func mergedAddresses(machineAddresses, providerAddresses []address) []instance.Address { 893 merged := make([]instance.Address, len(providerAddresses), len(providerAddresses)+len(machineAddresses)) 894 var providerValues set.Strings 895 for i, address := range providerAddresses { 896 providerValues.Add(address.Value) 897 merged[i] = address.InstanceAddress() 898 } 899 for _, address := range machineAddresses { 900 if !providerValues.Contains(address.Value) { 901 merged = append(merged, address.InstanceAddress()) 902 } 903 } 904 return merged 905 } 906 907 // Addresses returns any hostnames and ips associated with a machine, 908 // determined both by the machine itself, and by asking the provider. 909 // 910 // The addresses returned by the provider shadow any of the addresses 911 // that the machine reported with the same address value. Provider-reported 912 // addresses always come before machine-reported addresses. 913 func (m *Machine) Addresses() (addresses []instance.Address) { 914 return mergedAddresses(m.doc.MachineAddresses, m.doc.Addresses) 915 } 916 917 // SetAddresses records any addresses related to the machine, sourced 918 // by asking the provider. 919 func (m *Machine) SetAddresses(addresses ...instance.Address) (err error) { 920 stateAddresses := instanceAddressesToAddresses(addresses) 921 ops := []txn.Op{ 922 { 923 C: m.st.machines.Name, 924 Id: m.doc.Id, 925 Assert: notDeadDoc, 926 Update: bson.D{{"$set", bson.D{{"addresses", stateAddresses}}}}, 927 }, 928 } 929 930 if err = m.st.runTransaction(ops); err != nil { 931 return fmt.Errorf("cannot set addresses of machine %v: %v", m, onAbort(err, errDead)) 932 } 933 m.doc.Addresses = stateAddresses 934 return nil 935 } 936 937 // MachineAddresses returns any hostnames and ips associated with a machine, 938 // determined by asking the machine itself. 939 func (m *Machine) MachineAddresses() (addresses []instance.Address) { 940 for _, address := range m.doc.MachineAddresses { 941 addresses = append(addresses, address.InstanceAddress()) 942 } 943 return 944 } 945 946 // SetMachineAddresses records any addresses related to the machine, sourced 947 // by asking the machine. 948 func (m *Machine) SetMachineAddresses(addresses ...instance.Address) (err error) { 949 stateAddresses := instanceAddressesToAddresses(addresses) 950 ops := []txn.Op{ 951 { 952 C: m.st.machines.Name, 953 Id: m.doc.Id, 954 Assert: notDeadDoc, 955 Update: bson.D{{"$set", bson.D{{"machineaddresses", stateAddresses}}}}, 956 }, 957 } 958 959 if err = m.st.runTransaction(ops); err != nil { 960 return fmt.Errorf("cannot set machine addresses of machine %v: %v", m, onAbort(err, errDead)) 961 } 962 m.doc.MachineAddresses = stateAddresses 963 return nil 964 } 965 966 // RequestedNetworks returns the list of network names the machine 967 // should be on. Unlike networks specified with constraints, these 968 // networks are required to be present on the machine. 969 func (m *Machine) RequestedNetworks() ([]string, error) { 970 return readRequestedNetworks(m.st, m.globalKey()) 971 } 972 973 // Networks returns the list of configured networks on the machine. 974 // The configured and requested networks on a machine must match. 975 func (m *Machine) Networks() ([]*Network, error) { 976 requestedNetworks, err := m.RequestedNetworks() 977 if err != nil { 978 return nil, err 979 } 980 docs := []networkDoc{} 981 sel := bson.D{{"_id", bson.D{{"$in", requestedNetworks}}}} 982 err = m.st.networks.Find(sel).All(&docs) 983 if err != nil { 984 return nil, err 985 } 986 networks := make([]*Network, len(docs)) 987 for i, doc := range docs { 988 networks[i] = newNetwork(m.st, &doc) 989 } 990 return networks, nil 991 } 992 993 // NetworkInterfaces returns the list of configured network interfaces 994 // of the machine. 995 func (m *Machine) NetworkInterfaces() ([]*NetworkInterface, error) { 996 docs := []networkInterfaceDoc{} 997 err := m.st.networkInterfaces.Find(bson.D{{"machineid", m.doc.Id}}).All(&docs) 998 if err != nil { 999 return nil, err 1000 } 1001 ifaces := make([]*NetworkInterface, len(docs)) 1002 for i, doc := range docs { 1003 ifaces[i] = newNetworkInterface(m.st, &doc) 1004 } 1005 return ifaces, nil 1006 } 1007 1008 // AddNetworkInterface creates a new network interface with the given 1009 // args for this machine. The machine must be alive and not yet 1010 // provisioned, and there must be no other interface with the same MAC 1011 // address on the same network, or the same name on that machine for 1012 // this to succeed. If a network interface already exists, the 1013 // returned error satisfies errors.IsAlreadyExists. 1014 func (m *Machine) AddNetworkInterface(args NetworkInterfaceInfo) (iface *NetworkInterface, err error) { 1015 defer errors.Contextf(&err, "cannot add network interface %q to machine %q", args.InterfaceName, m.doc.Id) 1016 1017 if args.MACAddress == "" { 1018 return nil, fmt.Errorf("MAC address must be not empty") 1019 } 1020 if _, err = net.ParseMAC(args.MACAddress); err != nil { 1021 return nil, err 1022 } 1023 if args.InterfaceName == "" { 1024 return nil, fmt.Errorf("interface name must be not empty") 1025 } 1026 aliveAndNotProvisioned := append(isAliveDoc, bson.D{{"nonce", ""}}...) 1027 doc := newNetworkInterfaceDoc(args) 1028 doc.MachineId = m.doc.Id 1029 doc.Id = bson.NewObjectId() 1030 ops := []txn.Op{{ 1031 C: m.st.networks.Name, 1032 Id: args.NetworkName, 1033 Assert: txn.DocExists, 1034 }, { 1035 C: m.st.machines.Name, 1036 Id: m.doc.Id, 1037 Assert: aliveAndNotProvisioned, 1038 }, { 1039 C: m.st.networkInterfaces.Name, 1040 Id: doc.Id, 1041 Assert: txn.DocMissing, 1042 Insert: doc, 1043 }} 1044 1045 err = m.st.runTransaction(ops) 1046 switch err { 1047 case txn.ErrAborted: 1048 if _, err = m.st.Network(args.NetworkName); err != nil { 1049 return nil, err 1050 } 1051 if err = m.Refresh(); err != nil { 1052 return nil, err 1053 } else if m.doc.Life != Alive { 1054 return nil, fmt.Errorf("machine is not alive") 1055 } else if m.doc.Nonce != "" { 1056 msg := "machine already provisioned: dynamic network interfaces not currently supported" 1057 return nil, fmt.Errorf(msg) 1058 } 1059 // Should never happen. 1060 logger.Errorf("unhandled assert while adding network interface doc %#v", doc) 1061 case nil: 1062 // We have a unique key restrictions on the following fields: 1063 // - InterfaceName, MachineId 1064 // - MACAddress, NetworkName 1065 // These will cause the insert to fail if there is another record 1066 // with the same combination of values in the table. 1067 // The txn logic does not report insertion errors, so we check 1068 // that the record has actually been inserted correctly before 1069 // reporting success. 1070 if err = m.st.networkInterfaces.FindId(doc.Id).One(&doc); err == nil { 1071 return newNetworkInterface(m.st, doc), nil 1072 } 1073 sel := bson.D{{"interfacename", args.InterfaceName}, {"machineid", m.doc.Id}} 1074 if err = m.st.networkInterfaces.Find(sel).One(nil); err == nil { 1075 return nil, errors.AlreadyExistsf("%q on machine %q", args.InterfaceName, m.doc.Id) 1076 } 1077 sel = bson.D{{"macaddress", args.MACAddress}, {"networkname", args.NetworkName}} 1078 if err = m.st.networkInterfaces.Find(sel).One(nil); err == nil { 1079 return nil, errors.AlreadyExistsf("MAC address %q on network %q", args.MACAddress, args.NetworkName) 1080 } 1081 // Should never happen. 1082 logger.Errorf("unknown error while adding network interface doc %#v", doc) 1083 } 1084 return nil, err 1085 } 1086 1087 // CheckProvisioned returns true if the machine was provisioned with the given nonce. 1088 func (m *Machine) CheckProvisioned(nonce string) bool { 1089 return nonce == m.doc.Nonce && nonce != "" 1090 } 1091 1092 // String returns a unique description of this machine. 1093 func (m *Machine) String() string { 1094 return m.doc.Id 1095 } 1096 1097 // Placement returns the machine's Placement structure that should be used when 1098 // provisioning an instance for the machine. 1099 func (m *Machine) Placement() string { 1100 return m.doc.Placement 1101 } 1102 1103 // Constraints returns the exact constraints that should apply when provisioning 1104 // an instance for the machine. 1105 func (m *Machine) Constraints() (constraints.Value, error) { 1106 return readConstraints(m.st, m.globalKey()) 1107 } 1108 1109 // SetConstraints sets the exact constraints to apply when provisioning an 1110 // instance for the machine. It will fail if the machine is Dead, or if it 1111 // is already provisioned. 1112 func (m *Machine) SetConstraints(cons constraints.Value) (err error) { 1113 defer errors.Maskf(&err, "cannot set constraints") 1114 unsupported, err := m.st.validateConstraints(cons) 1115 if len(unsupported) > 0 { 1116 logger.Warningf( 1117 "setting constraints on machine %q: unsupported constraints: %v", m.Id(), strings.Join(unsupported, ",")) 1118 } else if err != nil { 1119 return err 1120 } 1121 notSetYet := bson.D{{"nonce", ""}} 1122 ops := []txn.Op{ 1123 { 1124 C: m.st.machines.Name, 1125 Id: m.doc.Id, 1126 Assert: append(isAliveDoc, notSetYet...), 1127 }, 1128 setConstraintsOp(m.st, m.globalKey(), cons), 1129 } 1130 // 3 attempts is enough to push the ErrExcessiveContention case out of the 1131 // realm of plausibility: it implies local state indicating unprovisioned, 1132 // and remote state indicating provisioned (reasonable); but which changes 1133 // back to unprovisioned and then to provisioned again with *very* specific 1134 // timing in the course of this loop. 1135 for i := 0; i < 3; i++ { 1136 if m.doc.Life != Alive { 1137 return errNotAlive 1138 } 1139 if _, err := m.InstanceId(); err == nil { 1140 return fmt.Errorf("machine is already provisioned") 1141 } else if !IsNotProvisionedError(err) { 1142 return err 1143 } 1144 if err := m.st.runTransaction(ops); err != txn.ErrAborted { 1145 return err 1146 } 1147 if m, err = m.st.Machine(m.doc.Id); err != nil { 1148 return err 1149 } 1150 } 1151 return ErrExcessiveContention 1152 } 1153 1154 // Status returns the status of the machine. 1155 func (m *Machine) Status() (status params.Status, info string, data params.StatusData, err error) { 1156 doc, err := getStatus(m.st, m.globalKey()) 1157 if err != nil { 1158 return "", "", nil, err 1159 } 1160 status = doc.Status 1161 info = doc.StatusInfo 1162 data = doc.StatusData 1163 return 1164 } 1165 1166 // SetStatus sets the status of the machine. 1167 func (m *Machine) SetStatus(status params.Status, info string, data params.StatusData) error { 1168 doc := statusDoc{ 1169 Status: status, 1170 StatusInfo: info, 1171 StatusData: data, 1172 } 1173 // If a machine is not yet provisioned, we allow its status 1174 // to be set back to pending (when a retry is to occur). 1175 _, err := m.InstanceId() 1176 allowPending := IsNotProvisionedError(err) 1177 if err := doc.validateSet(allowPending); err != nil { 1178 return err 1179 } 1180 ops := []txn.Op{{ 1181 C: m.st.machines.Name, 1182 Id: m.doc.Id, 1183 Assert: notDeadDoc, 1184 }, 1185 updateStatusOp(m.st, m.globalKey(), doc), 1186 } 1187 if err := m.st.runTransaction(ops); err != nil { 1188 return fmt.Errorf("cannot set status of machine %q: %v", m, onAbort(err, errNotAlive)) 1189 } 1190 return nil 1191 } 1192 1193 // Clean returns true if the machine does not have any deployed units or containers. 1194 func (m *Machine) Clean() bool { 1195 return m.doc.Clean 1196 } 1197 1198 // SupportedContainers returns any containers this machine is capable of hosting, and a bool 1199 // indicating if the supported containers have been determined or not. 1200 func (m *Machine) SupportedContainers() ([]instance.ContainerType, bool) { 1201 return m.doc.SupportedContainers, m.doc.SupportedContainersKnown 1202 } 1203 1204 // SupportsNoContainers records the fact that this machine doesn't support any containers. 1205 func (m *Machine) SupportsNoContainers() (err error) { 1206 if err = m.updateSupportedContainers([]instance.ContainerType{}); err != nil { 1207 return err 1208 } 1209 return m.markInvalidContainers() 1210 } 1211 1212 // SetSupportedContainers sets the list of containers supported by this machine. 1213 func (m *Machine) SetSupportedContainers(containers []instance.ContainerType) (err error) { 1214 if len(containers) == 0 { 1215 return fmt.Errorf("at least one valid container type is required") 1216 } 1217 for _, container := range containers { 1218 if container == instance.NONE { 1219 return fmt.Errorf("%q is not a valid container type", container) 1220 } 1221 } 1222 if err = m.updateSupportedContainers(containers); err != nil { 1223 return err 1224 } 1225 return m.markInvalidContainers() 1226 } 1227 1228 func isSupportedContainer(container instance.ContainerType, supportedContainers []instance.ContainerType) bool { 1229 for _, supportedContainer := range supportedContainers { 1230 if supportedContainer == container { 1231 return true 1232 } 1233 } 1234 return false 1235 } 1236 1237 // updateSupportedContainers sets the supported containers on this host machine. 1238 func (m *Machine) updateSupportedContainers(supportedContainers []instance.ContainerType) (err error) { 1239 ops := []txn.Op{ 1240 { 1241 C: m.st.machines.Name, 1242 Id: m.doc.Id, 1243 Assert: notDeadDoc, 1244 Update: bson.D{ 1245 {"$set", bson.D{ 1246 {"supportedcontainers", supportedContainers}, 1247 {"supportedcontainersknown", true}, 1248 }}}, 1249 }, 1250 } 1251 if err = m.st.runTransaction(ops); err != nil { 1252 return fmt.Errorf("cannot update supported containers of machine %v: %v", m, onAbort(err, errDead)) 1253 } 1254 m.doc.SupportedContainers = supportedContainers 1255 m.doc.SupportedContainersKnown = true 1256 return nil 1257 } 1258 1259 // markInvalidContainers sets the status of any container belonging to this machine 1260 // as being in error if the container type is not supported. 1261 func (m *Machine) markInvalidContainers() error { 1262 currentContainers, err := m.Containers() 1263 if err != nil { 1264 return err 1265 } 1266 for _, containerId := range currentContainers { 1267 if !isSupportedContainer(ContainerTypeFromId(containerId), m.doc.SupportedContainers) { 1268 container, err := m.st.Machine(containerId) 1269 if err != nil { 1270 logger.Errorf("loading container %v to mark as invalid: %v", containerId, err) 1271 continue 1272 } 1273 // There should never be a circumstance where an unsupported container is started. 1274 // Nonetheless, we check and log an error if such a situation arises. 1275 status, _, _, err := container.Status() 1276 if err != nil { 1277 logger.Errorf("finding status of container %v to mark as invalid: %v", containerId, err) 1278 continue 1279 } 1280 if status == params.StatusPending { 1281 containerType := ContainerTypeFromId(containerId) 1282 container.SetStatus( 1283 params.StatusError, "unsupported container", params.StatusData{"type": containerType}) 1284 } else { 1285 logger.Errorf("unsupported container %v has unexpected status %v", containerId, status) 1286 } 1287 } 1288 } 1289 return nil 1290 }