github.com/cloudbase/juju-core@v0.0.0-20140504232958-a7271ac7912f/state/machine.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package state
     5  
     6  import (
     7  	"fmt"
     8  	"strings"
     9  	"time"
    10  
    11  	"labix.org/v2/mgo"
    12  	"labix.org/v2/mgo/txn"
    13  
    14  	"launchpad.net/juju-core/constraints"
    15  	"launchpad.net/juju-core/errors"
    16  	"launchpad.net/juju-core/instance"
    17  	"launchpad.net/juju-core/names"
    18  	"launchpad.net/juju-core/state/api/params"
    19  	"launchpad.net/juju-core/state/presence"
    20  	"launchpad.net/juju-core/tools"
    21  	"launchpad.net/juju-core/utils"
    22  	"launchpad.net/juju-core/version"
    23  )
    24  
    25  // Machine represents the state of a machine.
    26  type Machine struct {
    27  	st  *State
    28  	doc machineDoc
    29  	annotator
    30  }
    31  
    32  // MachineJob values define responsibilities that machines may be
    33  // expected to fulfil.
    34  type MachineJob int
    35  
    36  const (
    37  	_ MachineJob = iota
    38  	JobHostUnits
    39  	JobManageEnviron
    40  
    41  	// Deprecated in 1.18.
    42  	JobManageStateDeprecated
    43  )
    44  
    45  var jobNames = map[MachineJob]params.MachineJob{
    46  	JobHostUnits:     params.JobHostUnits,
    47  	JobManageEnviron: params.JobManageEnviron,
    48  
    49  	// Deprecated in 1.18.
    50  	JobManageStateDeprecated: params.JobManageStateDeprecated,
    51  }
    52  
    53  // AllJobs returns all supported machine jobs.
    54  func AllJobs() []MachineJob {
    55  	return []MachineJob{JobHostUnits, JobManageEnviron}
    56  }
    57  
    58  // ToParams returns the job as params.MachineJob.
    59  func (job MachineJob) ToParams() params.MachineJob {
    60  	if paramsJob, ok := jobNames[job]; ok {
    61  		return paramsJob
    62  	}
    63  	return params.MachineJob(fmt.Sprintf("<unknown job %d>", int(job)))
    64  }
    65  
    66  // MachineJobFromParams returns the job corresponding to params.MachineJob.
    67  func MachineJobFromParams(job params.MachineJob) (MachineJob, error) {
    68  	for machineJob, paramJob := range jobNames {
    69  		if paramJob == job {
    70  			return machineJob, nil
    71  		}
    72  	}
    73  	return -1, fmt.Errorf("invalid machine job %q", job)
    74  }
    75  
    76  func (job MachineJob) String() string {
    77  	return string(job.ToParams())
    78  }
    79  
    80  // machineDoc represents the internal state of a machine in MongoDB.
    81  // Note the correspondence with MachineInfo in state/api/params.
    82  type machineDoc struct {
    83  	Id            string `bson:"_id"`
    84  	Nonce         string
    85  	Series        string
    86  	ContainerType string
    87  	Principals    []string
    88  	Life          Life
    89  	Tools         *tools.Tools `bson:",omitempty"`
    90  	Jobs          []MachineJob
    91  	NoVote        bool
    92  	HasVote       bool
    93  	PasswordHash  string
    94  	Clean         bool
    95  	// We store 2 different sets of addresses for the machine, obtained
    96  	// from different sources.
    97  	// Addresses is the set of addresses obtained by asking the provider.
    98  	Addresses []address
    99  	// MachineAddresses is the set of addresses obtained from the machine itself.
   100  	MachineAddresses []address
   101  	// The SupportedContainers attributes are used to advertise what containers this
   102  	// machine is capable of hosting.
   103  	SupportedContainersKnown bool
   104  	SupportedContainers      []instance.ContainerType `bson:",omitempty"`
   105  	// Deprecated. InstanceId, now lives on instanceData.
   106  	// This attribute is retained so that data from existing machines can be read.
   107  	// SCHEMACHANGE
   108  	// TODO(wallyworld): remove this attribute when schema upgrades are possible.
   109  	InstanceId instance.Id
   110  }
   111  
   112  func newMachine(st *State, doc *machineDoc) *Machine {
   113  	machine := &Machine{
   114  		st:  st,
   115  		doc: *doc,
   116  	}
   117  	machine.annotator = annotator{
   118  		globalKey: machine.globalKey(),
   119  		tag:       machine.Tag(),
   120  		st:        st,
   121  	}
   122  	return machine
   123  }
   124  
   125  // Id returns the machine id.
   126  func (m *Machine) Id() string {
   127  	return m.doc.Id
   128  }
   129  
   130  // Series returns the operating system series running on the machine.
   131  func (m *Machine) Series() string {
   132  	return m.doc.Series
   133  }
   134  
   135  // ContainerType returns the type of container hosting this machine.
   136  func (m *Machine) ContainerType() instance.ContainerType {
   137  	return instance.ContainerType(m.doc.ContainerType)
   138  }
   139  
   140  // machineGlobalKey returns the global database key for the identified machine.
   141  func machineGlobalKey(id string) string {
   142  	return "m#" + id
   143  }
   144  
   145  // globalKey returns the global database key for the machine.
   146  func (m *Machine) globalKey() string {
   147  	return machineGlobalKey(m.doc.Id)
   148  }
   149  
   150  // instanceData holds attributes relevant to a provisioned machine.
   151  type instanceData struct {
   152  	Id         string      `bson:"_id"`
   153  	InstanceId instance.Id `bson:"instanceid"`
   154  	Status     string      `bson:"status,omitempty"`
   155  	Arch       *string     `bson:"arch,omitempty"`
   156  	Mem        *uint64     `bson:"mem,omitempty"`
   157  	RootDisk   *uint64     `bson:"rootdisk,omitempty"`
   158  	CpuCores   *uint64     `bson:"cpucores,omitempty"`
   159  	CpuPower   *uint64     `bson:"cpupower,omitempty"`
   160  	Tags       *[]string   `bson:"tags,omitempty"`
   161  }
   162  
   163  // TODO(wallyworld): move this method to a service.
   164  func (m *Machine) HardwareCharacteristics() (*instance.HardwareCharacteristics, error) {
   165  	hc := &instance.HardwareCharacteristics{}
   166  	instData, err := getInstanceData(m.st, m.Id())
   167  	if err != nil {
   168  		return nil, err
   169  	}
   170  	hc.Arch = instData.Arch
   171  	hc.Mem = instData.Mem
   172  	hc.RootDisk = instData.RootDisk
   173  	hc.CpuCores = instData.CpuCores
   174  	hc.CpuPower = instData.CpuPower
   175  	hc.Tags = instData.Tags
   176  	return hc, nil
   177  }
   178  
   179  func getInstanceData(st *State, id string) (instanceData, error) {
   180  	var instData instanceData
   181  	err := st.instanceData.FindId(id).One(&instData)
   182  	if err == mgo.ErrNotFound {
   183  		return instanceData{}, errors.NotFoundf("instance data for machine %v", id)
   184  	}
   185  	if err != nil {
   186  		return instanceData{}, fmt.Errorf("cannot get instance data for machine %v: %v", id, err)
   187  	}
   188  	return instData, nil
   189  }
   190  
   191  // Tag returns a name identifying the machine that is safe to use
   192  // as a file name.  The returned name will be different from other
   193  // Tag values returned by any other entities from the same state.
   194  func (m *Machine) Tag() string {
   195  	return names.MachineTag(m.Id())
   196  }
   197  
   198  // Life returns whether the machine is Alive, Dying or Dead.
   199  func (m *Machine) Life() Life {
   200  	return m.doc.Life
   201  }
   202  
   203  // Jobs returns the responsibilities that must be fulfilled by m's agent.
   204  func (m *Machine) Jobs() []MachineJob {
   205  	return m.doc.Jobs
   206  }
   207  
   208  // WantsVote reports whether the machine is a state server
   209  // that wants to take part in peer voting.
   210  func (m *Machine) WantsVote() bool {
   211  	return hasJob(m.doc.Jobs, JobManageEnviron) && !m.doc.NoVote
   212  }
   213  
   214  // HasVote reports whether that machine is currently a voting
   215  // member of the replica set.
   216  func (m *Machine) HasVote() bool {
   217  	return m.doc.HasVote
   218  }
   219  
   220  // SetHasVote sets whether the machine is currently a voting
   221  // member of the replica set. It should only be called
   222  // from the worker that maintains the replica set.
   223  func (m *Machine) SetHasVote(hasVote bool) error {
   224  	ops := []txn.Op{{
   225  		C:      m.st.machines.Name,
   226  		Id:     m.doc.Id,
   227  		Assert: notDeadDoc,
   228  		Update: D{{"$set", D{{"hasvote", hasVote}}}},
   229  	}}
   230  	if err := m.st.runTransaction(ops); err != nil {
   231  		return fmt.Errorf("cannot set HasVote of machine %v: %v", m, onAbort(err, errDead))
   232  	}
   233  	m.doc.HasVote = hasVote
   234  	return nil
   235  }
   236  
   237  // IsManager returns true if the machine has JobManageEnviron.
   238  func (m *Machine) IsManager() bool {
   239  	return hasJob(m.doc.Jobs, JobManageEnviron)
   240  }
   241  
   242  // IsManual returns true if the machine was manually provisioned.
   243  func (m *Machine) IsManual() (bool, error) {
   244  	// Apart from the bootstrap machine, manually provisioned
   245  	// machines have a nonce prefixed with "manual:". This is
   246  	// unique to manual provisioning.
   247  	if strings.HasPrefix(m.doc.Nonce, "manual:") {
   248  		return true, nil
   249  	}
   250  	// The bootstrap machine uses BootstrapNonce, so in that
   251  	// case we need to check if its provider type is "manual".
   252  	// We also check for "null", which is an alias for manual.
   253  	if m.doc.Id == "0" {
   254  		cfg, err := m.st.EnvironConfig()
   255  		if err != nil {
   256  			return false, err
   257  		}
   258  		t := cfg.Type()
   259  		return t == "null" || t == "manual", nil
   260  	}
   261  	return false, nil
   262  }
   263  
   264  // AgentTools returns the tools that the agent is currently running.
   265  // It returns an error that satisfies IsNotFound if the tools have not yet been set.
   266  func (m *Machine) AgentTools() (*tools.Tools, error) {
   267  	if m.doc.Tools == nil {
   268  		return nil, errors.NotFoundf("agent tools for machine %v", m)
   269  	}
   270  	tools := *m.doc.Tools
   271  	return &tools, nil
   272  }
   273  
   274  // checkVersionValidity checks whether the given version is suitable
   275  // for passing to SetAgentVersion.
   276  func checkVersionValidity(v version.Binary) error {
   277  	if v.Series == "" || v.Arch == "" {
   278  		return fmt.Errorf("empty series or arch")
   279  	}
   280  	return nil
   281  }
   282  
   283  // SetAgentVersion sets the version of juju that the agent is
   284  // currently running.
   285  func (m *Machine) SetAgentVersion(v version.Binary) (err error) {
   286  	defer utils.ErrorContextf(&err, "cannot set agent version for machine %v", m)
   287  	if err = checkVersionValidity(v); err != nil {
   288  		return err
   289  	}
   290  	tools := &tools.Tools{Version: v}
   291  	ops := []txn.Op{{
   292  		C:      m.st.machines.Name,
   293  		Id:     m.doc.Id,
   294  		Assert: notDeadDoc,
   295  		Update: D{{"$set", D{{"tools", tools}}}},
   296  	}}
   297  	if err := m.st.runTransaction(ops); err != nil {
   298  		return onAbort(err, errDead)
   299  	}
   300  	m.doc.Tools = tools
   301  	return nil
   302  }
   303  
   304  // SetMongoPassword sets the password the agent responsible for the machine
   305  // should use to communicate with the state servers.  Previous passwords
   306  // are invalidated.
   307  func (m *Machine) SetMongoPassword(password string) error {
   308  	return m.st.setMongoPassword(m.Tag(), password)
   309  }
   310  
   311  // SetPassword sets the password for the machine's agent.
   312  func (m *Machine) SetPassword(password string) error {
   313  	if len(password) < utils.MinAgentPasswordLength {
   314  		return fmt.Errorf("password is only %d bytes long, and is not a valid Agent password", len(password))
   315  	}
   316  	return m.setPasswordHash(utils.AgentPasswordHash(password))
   317  }
   318  
   319  // setPasswordHash sets the underlying password hash in the database directly
   320  // to the value supplied. This is split out from SetPassword to allow direct
   321  // manipulation in tests (to check for backwards compatibility).
   322  func (m *Machine) setPasswordHash(passwordHash string) error {
   323  	ops := []txn.Op{{
   324  		C:      m.st.machines.Name,
   325  		Id:     m.doc.Id,
   326  		Assert: notDeadDoc,
   327  		Update: D{{"$set", D{{"passwordhash", passwordHash}}}},
   328  	}}
   329  	if err := m.st.runTransaction(ops); err != nil {
   330  		return fmt.Errorf("cannot set password of machine %v: %v", m, onAbort(err, errDead))
   331  	}
   332  	m.doc.PasswordHash = passwordHash
   333  	return nil
   334  }
   335  
   336  // Return the underlying PasswordHash stored in the database. Used by the test
   337  // suite to check that the PasswordHash gets properly updated to new values
   338  // when compatibility mode is detected.
   339  func (m *Machine) getPasswordHash() string {
   340  	return m.doc.PasswordHash
   341  }
   342  
   343  // PasswordValid returns whether the given password is valid
   344  // for the given machine.
   345  func (m *Machine) PasswordValid(password string) bool {
   346  	agentHash := utils.AgentPasswordHash(password)
   347  	if agentHash == m.doc.PasswordHash {
   348  		return true
   349  	}
   350  	// In Juju 1.16 and older we used the slower password hash for unit
   351  	// agents. So check to see if the supplied password matches the old
   352  	// path, and if so, update it to the new mechanism.
   353  	// We ignore any error in setting the password, as we'll just try again
   354  	// next time
   355  	if utils.UserPasswordHash(password, utils.CompatSalt) == m.doc.PasswordHash {
   356  		logger.Debugf("%s logged in with old password hash, changing to AgentPasswordHash",
   357  			m.Tag())
   358  		m.setPasswordHash(agentHash)
   359  		return true
   360  	}
   361  	return false
   362  }
   363  
   364  // Destroy sets the machine lifecycle to Dying if it is Alive. It does
   365  // nothing otherwise. Destroy will fail if the machine has principal
   366  // units assigned, or if the machine has JobManageEnviron.
   367  // If the machine has assigned units, Destroy will return
   368  // a HasAssignedUnitsError.
   369  func (m *Machine) Destroy() error {
   370  	return m.advanceLifecycle(Dying)
   371  }
   372  
   373  // ForceDestroy queues the machine for complete removal, including the
   374  // destruction of all units and containers on the machine.
   375  func (m *Machine) ForceDestroy() error {
   376  	if !m.IsManager() {
   377  		ops := []txn.Op{{
   378  			C:      m.st.machines.Name,
   379  			Id:     m.doc.Id,
   380  			Assert: D{{"jobs", D{{"$nin", []MachineJob{JobManageEnviron}}}}},
   381  		}, m.st.newCleanupOp("machine", m.doc.Id)}
   382  		if err := m.st.runTransaction(ops); err != txn.ErrAborted {
   383  			return err
   384  		}
   385  	}
   386  	return fmt.Errorf("machine %s is required by the environment", m.doc.Id)
   387  }
   388  
   389  // EnsureDead sets the machine lifecycle to Dead if it is Alive or Dying.
   390  // It does nothing otherwise. EnsureDead will fail if the machine has
   391  // principal units assigned, or if the machine has JobManageEnviron.
   392  // If the machine has assigned units, EnsureDead will return
   393  // a HasAssignedUnitsError.
   394  func (m *Machine) EnsureDead() error {
   395  	return m.advanceLifecycle(Dead)
   396  }
   397  
   398  type HasAssignedUnitsError struct {
   399  	MachineId string
   400  	UnitNames []string
   401  }
   402  
   403  func (e *HasAssignedUnitsError) Error() string {
   404  	return fmt.Sprintf("machine %s has unit %q assigned", e.MachineId, e.UnitNames[0])
   405  }
   406  
   407  func IsHasAssignedUnitsError(err error) bool {
   408  	_, ok := err.(*HasAssignedUnitsError)
   409  	return ok
   410  }
   411  
   412  // Containers returns the container ids belonging to a parent machine.
   413  // TODO(wallyworld): move this method to a service
   414  func (m *Machine) Containers() ([]string, error) {
   415  	var mc machineContainers
   416  	err := m.st.containerRefs.FindId(m.Id()).One(&mc)
   417  	if err == nil {
   418  		return mc.Children, nil
   419  	}
   420  	if err == mgo.ErrNotFound {
   421  		return nil, errors.NotFoundf("container info for machine %v", m.Id())
   422  	}
   423  	return nil, err
   424  }
   425  
   426  // ParentId returns the Id of the host machine if this machine is a container.
   427  func (m *Machine) ParentId() (string, bool) {
   428  	parentId := ParentId(m.Id())
   429  	return parentId, parentId != ""
   430  }
   431  
   432  type HasContainersError struct {
   433  	MachineId    string
   434  	ContainerIds []string
   435  }
   436  
   437  func (e *HasContainersError) Error() string {
   438  	return fmt.Sprintf("machine %s is hosting containers %q", e.MachineId, strings.Join(e.ContainerIds, ","))
   439  }
   440  
   441  func IsHasContainersError(err error) bool {
   442  	_, ok := err.(*HasContainersError)
   443  	return ok
   444  }
   445  
   446  // advanceLifecycle ensures that the machine's lifecycle is no earlier
   447  // than the supplied value. If the machine already has that lifecycle
   448  // value, or a later one, no changes will be made to remote state. If
   449  // the machine has any responsibilities that preclude a valid change in
   450  // lifecycle, it will return an error.
   451  func (original *Machine) advanceLifecycle(life Life) (err error) {
   452  	containers, err := original.Containers()
   453  	if err != nil {
   454  		return err
   455  	}
   456  	if len(containers) > 0 {
   457  		return &HasContainersError{
   458  			MachineId:    original.doc.Id,
   459  			ContainerIds: containers,
   460  		}
   461  	}
   462  	m := original
   463  	defer func() {
   464  		if err == nil {
   465  			// The machine's lifecycle is known to have advanced; it may be
   466  			// known to have already advanced further than requested, in
   467  			// which case we set the latest known valid value.
   468  			if m == nil {
   469  				life = Dead
   470  			} else if m.doc.Life > life {
   471  				life = m.doc.Life
   472  			}
   473  			original.doc.Life = life
   474  		}
   475  	}()
   476  	// op and
   477  	op := txn.Op{
   478  		C:      m.st.machines.Name,
   479  		Id:     m.doc.Id,
   480  		Update: D{{"$set", D{{"life", life}}}},
   481  	}
   482  	advanceAsserts := D{
   483  		{"jobs", D{{"$nin", []MachineJob{JobManageEnviron}}}},
   484  		{"$or", []D{
   485  			{{"principals", D{{"$size", 0}}}},
   486  			{{"principals", D{{"$exists", false}}}},
   487  		}},
   488  		{"hasvote", D{{"$ne", true}}},
   489  	}
   490  	// 3 attempts: one with original data, one with refreshed data, and a final
   491  	// one intended to determine the cause of failure of the preceding attempt.
   492  	for i := 0; i < 3; i++ {
   493  		// If the transaction was aborted, grab a fresh copy of the machine data.
   494  		// We don't write to original, because the expectation is that state-
   495  		// changing methods only set the requested change on the receiver; a case
   496  		// could perhaps be made that this is not a helpful convention in the
   497  		// context of the new state API, but we maintain consistency in the
   498  		// face of uncertainty.
   499  		if i != 0 {
   500  			if m, err = m.st.Machine(m.doc.Id); errors.IsNotFoundError(err) {
   501  				return nil
   502  			} else if err != nil {
   503  				return err
   504  			}
   505  		}
   506  		// Check that the life change is sane, and collect the assertions
   507  		// necessary to determine that it remains so.
   508  		switch life {
   509  		case Dying:
   510  			if m.doc.Life != Alive {
   511  				return nil
   512  			}
   513  			op.Assert = append(advanceAsserts, isAliveDoc...)
   514  		case Dead:
   515  			if m.doc.Life == Dead {
   516  				return nil
   517  			}
   518  			op.Assert = append(advanceAsserts, notDeadDoc...)
   519  		default:
   520  			panic(fmt.Errorf("cannot advance lifecycle to %v", life))
   521  		}
   522  		// Check that the machine does not have any responsibilities that
   523  		// prevent a lifecycle change.
   524  		if hasJob(m.doc.Jobs, JobManageEnviron) {
   525  			// (NOTE: When we enable multiple JobManageEnviron machines,
   526  			// this restriction will be lifted, but we will assert that the
   527  			// machine is not voting)
   528  			return fmt.Errorf("machine %s is required by the environment", m.doc.Id)
   529  		}
   530  		if m.doc.HasVote {
   531  			return fmt.Errorf("machine %s is a voting replica set member", m.doc.Id)
   532  		}
   533  		if len(m.doc.Principals) != 0 {
   534  			return &HasAssignedUnitsError{
   535  				MachineId: m.doc.Id,
   536  				UnitNames: m.doc.Principals,
   537  			}
   538  		}
   539  		// Run the transaction...
   540  		if err := m.st.runTransaction([]txn.Op{op}); err != txn.ErrAborted {
   541  			return err
   542  		}
   543  		// ...and retry on abort.
   544  	}
   545  	// In very rare circumstances, the final iteration above will have determined
   546  	// no cause of failure, and attempted a final transaction: if this also failed,
   547  	// we can be sure that the machine document is changing very fast, in a somewhat
   548  	// surprising fashion, and that it is sensible to back off for now.
   549  	return fmt.Errorf("machine %s cannot advance lifecycle: %v", m, ErrExcessiveContention)
   550  }
   551  
   552  // Remove removes the machine from state. It will fail if the machine is not
   553  // Dead.
   554  func (m *Machine) Remove() (err error) {
   555  	defer utils.ErrorContextf(&err, "cannot remove machine %s", m.doc.Id)
   556  	if m.doc.Life != Dead {
   557  		return fmt.Errorf("machine is not dead")
   558  	}
   559  	ops := []txn.Op{
   560  		{
   561  			C:      m.st.machines.Name,
   562  			Id:     m.doc.Id,
   563  			Assert: txn.DocExists,
   564  			Remove: true,
   565  		},
   566  		{
   567  			C:      m.st.instanceData.Name,
   568  			Id:     m.doc.Id,
   569  			Remove: true,
   570  		},
   571  		removeStatusOp(m.st, m.globalKey()),
   572  		removeConstraintsOp(m.st, m.globalKey()),
   573  		annotationRemoveOp(m.st, m.globalKey()),
   574  	}
   575  	ops = append(ops, removeContainerRefOps(m.st, m.Id())...)
   576  	// The only abort conditions in play indicate that the machine has already
   577  	// been removed.
   578  	return onAbort(m.st.runTransaction(ops), nil)
   579  }
   580  
   581  // Refresh refreshes the contents of the machine from the underlying
   582  // state. It returns an error that satisfies IsNotFound if the machine has
   583  // been removed.
   584  func (m *Machine) Refresh() error {
   585  	doc := machineDoc{}
   586  	err := m.st.machines.FindId(m.doc.Id).One(&doc)
   587  	if err == mgo.ErrNotFound {
   588  		return errors.NotFoundf("machine %v", m)
   589  	}
   590  	if err != nil {
   591  		return fmt.Errorf("cannot refresh machine %v: %v", m, err)
   592  	}
   593  	m.doc = doc
   594  	return nil
   595  }
   596  
   597  // AgentAlive returns whether the respective remote agent is alive.
   598  func (m *Machine) AgentAlive() (bool, error) {
   599  	return m.st.pwatcher.Alive(m.globalKey())
   600  }
   601  
   602  // WaitAgentAlive blocks until the respective agent is alive.
   603  func (m *Machine) WaitAgentAlive(timeout time.Duration) (err error) {
   604  	defer utils.ErrorContextf(&err, "waiting for agent of machine %v", m)
   605  	ch := make(chan presence.Change)
   606  	m.st.pwatcher.Watch(m.globalKey(), ch)
   607  	defer m.st.pwatcher.Unwatch(m.globalKey(), ch)
   608  	for i := 0; i < 2; i++ {
   609  		select {
   610  		case change := <-ch:
   611  			if change.Alive {
   612  				return nil
   613  			}
   614  		case <-time.After(timeout):
   615  			return fmt.Errorf("still not alive after timeout")
   616  		case <-m.st.pwatcher.Dead():
   617  			return m.st.pwatcher.Err()
   618  		}
   619  	}
   620  	panic(fmt.Sprintf("presence reported dead status twice in a row for machine %v", m))
   621  }
   622  
   623  // SetAgentAlive signals that the agent for machine m is alive.
   624  // It returns the started pinger.
   625  func (m *Machine) SetAgentAlive() (*presence.Pinger, error) {
   626  	p := presence.NewPinger(m.st.presence, m.globalKey())
   627  	err := p.Start()
   628  	if err != nil {
   629  		return nil, err
   630  	}
   631  	return p, nil
   632  }
   633  
   634  // InstanceId returns the provider specific instance id for this
   635  // machine, or a NotProvisionedError, if not set.
   636  func (m *Machine) InstanceId() (instance.Id, error) {
   637  	// SCHEMACHANGE
   638  	// TODO(wallyworld) - remove this backward compatibility code when schema upgrades are possible
   639  	// (we first check for InstanceId stored on the machineDoc)
   640  	if m.doc.InstanceId != "" {
   641  		return m.doc.InstanceId, nil
   642  	}
   643  	instData, err := getInstanceData(m.st, m.Id())
   644  	if (err == nil && instData.InstanceId == "") || errors.IsNotFoundError(err) {
   645  		err = NotProvisionedError(m.Id())
   646  	}
   647  	if err != nil {
   648  		return "", err
   649  	}
   650  	return instData.InstanceId, nil
   651  }
   652  
   653  // InstanceStatus returns the provider specific instance status for this machine,
   654  // or a NotProvisionedError if instance is not yet provisioned.
   655  func (m *Machine) InstanceStatus() (string, error) {
   656  	// SCHEMACHANGE
   657  	// InstanceId may not be stored in the instanceData doc, so we
   658  	// get it using an API on machine which knows to look in the old
   659  	// place if necessary.
   660  	instId, err := m.InstanceId()
   661  	if err != nil {
   662  		return "", err
   663  	}
   664  	instData, err := getInstanceData(m.st, m.Id())
   665  	if (err == nil && instId == "") || errors.IsNotFoundError(err) {
   666  		err = NotProvisionedError(m.Id())
   667  	}
   668  	if err != nil {
   669  		return "", err
   670  	}
   671  	return instData.Status, nil
   672  }
   673  
   674  // SetInstanceStatus sets the provider specific instance status for a machine.
   675  func (m *Machine) SetInstanceStatus(status string) (err error) {
   676  	defer utils.ErrorContextf(&err, "cannot set instance status for machine %q", m)
   677  
   678  	// SCHEMACHANGE - we can't do this yet until the schema is updated
   679  	// so just do a txn.DocExists for now.
   680  	// provisioned := D{{"instanceid", D{{"$ne", ""}}}}
   681  	ops := []txn.Op{
   682  		{
   683  			C:      m.st.instanceData.Name,
   684  			Id:     m.doc.Id,
   685  			Assert: txn.DocExists,
   686  			Update: D{{"$set", D{{"status", status}}}},
   687  		},
   688  	}
   689  
   690  	if err = m.st.runTransaction(ops); err == nil {
   691  		return nil
   692  	} else if err != txn.ErrAborted {
   693  		return err
   694  	}
   695  	return NotProvisionedError(m.Id())
   696  }
   697  
   698  // Units returns all the units that have been assigned to the machine.
   699  func (m *Machine) Units() (units []*Unit, err error) {
   700  	defer utils.ErrorContextf(&err, "cannot get units assigned to machine %v", m)
   701  	pudocs := []unitDoc{}
   702  	err = m.st.units.Find(D{{"machineid", m.doc.Id}}).All(&pudocs)
   703  	if err != nil {
   704  		return nil, err
   705  	}
   706  	for _, pudoc := range pudocs {
   707  		units = append(units, newUnit(m.st, &pudoc))
   708  		docs := []unitDoc{}
   709  		err = m.st.units.Find(D{{"principal", pudoc.Name}}).All(&docs)
   710  		if err != nil {
   711  			return nil, err
   712  		}
   713  		for _, doc := range docs {
   714  			units = append(units, newUnit(m.st, &doc))
   715  		}
   716  	}
   717  	return units, nil
   718  }
   719  
   720  // SetProvisioned sets the provider specific machine id, nonce and also metadata for
   721  // this machine. Once set, the instance id cannot be changed.
   722  //
   723  // When provisioning an instance, a nonce should be created and passed
   724  // when starting it, before adding the machine to the state. This means
   725  // that if the provisioner crashes (or its connection to the state is
   726  // lost) after starting the instance, we can be sure that only a single
   727  // instance will be able to act for that machine.
   728  func (m *Machine) SetProvisioned(id instance.Id, nonce string, characteristics *instance.HardwareCharacteristics) (err error) {
   729  	defer utils.ErrorContextf(&err, "cannot set instance data for machine %q", m)
   730  
   731  	if id == "" || nonce == "" {
   732  		return fmt.Errorf("instance id and nonce cannot be empty")
   733  	}
   734  
   735  	if characteristics == nil {
   736  		characteristics = &instance.HardwareCharacteristics{}
   737  	}
   738  	instData := &instanceData{
   739  		Id:         m.doc.Id,
   740  		InstanceId: id,
   741  		Arch:       characteristics.Arch,
   742  		Mem:        characteristics.Mem,
   743  		RootDisk:   characteristics.RootDisk,
   744  		CpuCores:   characteristics.CpuCores,
   745  		CpuPower:   characteristics.CpuPower,
   746  		Tags:       characteristics.Tags,
   747  	}
   748  	// SCHEMACHANGE
   749  	// TODO(wallyworld) - do not check instanceId on machineDoc after schema is upgraded
   750  	notSetYet := D{{"instanceid", ""}, {"nonce", ""}}
   751  	ops := []txn.Op{
   752  		{
   753  			C:      m.st.machines.Name,
   754  			Id:     m.doc.Id,
   755  			Assert: append(isAliveDoc, notSetYet...),
   756  			Update: D{{"$set", D{{"instanceid", id}, {"nonce", nonce}}}},
   757  		}, {
   758  			C:      m.st.instanceData.Name,
   759  			Id:     m.doc.Id,
   760  			Assert: txn.DocMissing,
   761  			Insert: instData,
   762  		},
   763  	}
   764  
   765  	if err = m.st.runTransaction(ops); err == nil {
   766  		m.doc.Nonce = nonce
   767  		// SCHEMACHANGE
   768  		// TODO(wallyworld) - remove this backward compatibility code when schema upgrades are possible
   769  		// (InstanceId is stored on the instanceData document but we duplicate the value on the machineDoc.
   770  		m.doc.InstanceId = id
   771  		return nil
   772  	} else if err != txn.ErrAborted {
   773  		return err
   774  	} else if alive, err := isAlive(m.st.machines, m.doc.Id); err != nil {
   775  		return err
   776  	} else if !alive {
   777  		return errNotAlive
   778  	}
   779  	return fmt.Errorf("already set")
   780  }
   781  
   782  // notProvisionedError records an error when a machine is not provisioned.
   783  type notProvisionedError struct {
   784  	machineId string
   785  }
   786  
   787  func NotProvisionedError(machineId string) error {
   788  	return &notProvisionedError{machineId}
   789  }
   790  
   791  func (e *notProvisionedError) Error() string {
   792  	return fmt.Sprintf("machine %v is not provisioned", e.machineId)
   793  }
   794  
   795  // IsNotProvisionedError returns true if err is a notProvisionedError.
   796  func IsNotProvisionedError(err error) bool {
   797  	_, ok := err.(*notProvisionedError)
   798  	return ok
   799  }
   800  
   801  // Addresses returns any hostnames and ips associated with a machine,
   802  // determined both by the machine itself, and by asking the provider.
   803  //
   804  // The addresses returned by the provider shadow any of the addresses
   805  // that the machine reported with the same address value.
   806  func (m *Machine) Addresses() (addresses []instance.Address) {
   807  	merged := make(map[string]instance.Address)
   808  	for _, address := range m.doc.MachineAddresses {
   809  		merged[address.Value] = address.InstanceAddress()
   810  	}
   811  	for _, address := range m.doc.Addresses {
   812  		merged[address.Value] = address.InstanceAddress()
   813  	}
   814  	for _, address := range merged {
   815  		addresses = append(addresses, address)
   816  	}
   817  	return
   818  }
   819  
   820  // SetAddresses records any addresses related to the machine, sourced
   821  // by asking the provider.
   822  func (m *Machine) SetAddresses(addresses []instance.Address) (err error) {
   823  	stateAddresses := instanceAddressesToAddresses(addresses)
   824  	ops := []txn.Op{
   825  		{
   826  			C:      m.st.machines.Name,
   827  			Id:     m.doc.Id,
   828  			Assert: notDeadDoc,
   829  			Update: D{{"$set", D{{"addresses", stateAddresses}}}},
   830  		},
   831  	}
   832  
   833  	if err = m.st.runTransaction(ops); err != nil {
   834  		return fmt.Errorf("cannot set addresses of machine %v: %v", m, onAbort(err, errDead))
   835  	}
   836  	m.doc.Addresses = stateAddresses
   837  	return nil
   838  }
   839  
   840  // MachineAddresses returns any hostnames and ips associated with a machine,
   841  // determined by asking the machine itself.
   842  func (m *Machine) MachineAddresses() (addresses []instance.Address) {
   843  	for _, address := range m.doc.MachineAddresses {
   844  		addresses = append(addresses, address.InstanceAddress())
   845  	}
   846  	return
   847  }
   848  
   849  // SetMachineAddresses records any addresses related to the machine, sourced
   850  // by asking the machine.
   851  func (m *Machine) SetMachineAddresses(addresses []instance.Address) (err error) {
   852  	stateAddresses := instanceAddressesToAddresses(addresses)
   853  	ops := []txn.Op{
   854  		{
   855  			C:      m.st.machines.Name,
   856  			Id:     m.doc.Id,
   857  			Assert: notDeadDoc,
   858  			Update: D{{"$set", D{{"machineaddresses", stateAddresses}}}},
   859  		},
   860  	}
   861  
   862  	if err = m.st.runTransaction(ops); err != nil {
   863  		return fmt.Errorf("cannot set machine addresses of machine %v: %v", m, onAbort(err, errDead))
   864  	}
   865  	m.doc.MachineAddresses = stateAddresses
   866  	return nil
   867  }
   868  
   869  // CheckProvisioned returns true if the machine was provisioned with the given nonce.
   870  func (m *Machine) CheckProvisioned(nonce string) bool {
   871  	return nonce == m.doc.Nonce && nonce != ""
   872  }
   873  
   874  // String returns a unique description of this machine.
   875  func (m *Machine) String() string {
   876  	return m.doc.Id
   877  }
   878  
   879  // Constraints returns the exact constraints that should apply when provisioning
   880  // an instance for the machine.
   881  func (m *Machine) Constraints() (constraints.Value, error) {
   882  	return readConstraints(m.st, m.globalKey())
   883  }
   884  
   885  // SetConstraints sets the exact constraints to apply when provisioning an
   886  // instance for the machine. It will fail if the machine is Dead, or if it
   887  // is already provisioned.
   888  func (m *Machine) SetConstraints(cons constraints.Value) (err error) {
   889  	defer utils.ErrorContextf(&err, "cannot set constraints")
   890  	notSetYet := D{{"nonce", ""}}
   891  	ops := []txn.Op{
   892  		{
   893  			C:      m.st.machines.Name,
   894  			Id:     m.doc.Id,
   895  			Assert: append(isAliveDoc, notSetYet...),
   896  		},
   897  		setConstraintsOp(m.st, m.globalKey(), cons),
   898  	}
   899  	// 3 attempts is enough to push the ErrExcessiveContention case out of the
   900  	// realm of plausibility: it implies local state indicating unprovisioned,
   901  	// and remote state indicating provisioned (reasonable); but which changes
   902  	// back to unprovisioned and then to provisioned again with *very* specific
   903  	// timing in the course of this loop.
   904  	for i := 0; i < 3; i++ {
   905  		if m.doc.Life != Alive {
   906  			return errNotAlive
   907  		}
   908  		if _, err := m.InstanceId(); err == nil {
   909  			return fmt.Errorf("machine is already provisioned")
   910  		} else if !IsNotProvisionedError(err) {
   911  			return err
   912  		}
   913  		if err := m.st.runTransaction(ops); err != txn.ErrAborted {
   914  			return err
   915  		}
   916  		if m, err = m.st.Machine(m.doc.Id); err != nil {
   917  			return err
   918  		}
   919  	}
   920  	return ErrExcessiveContention
   921  }
   922  
   923  // Status returns the status of the machine.
   924  func (m *Machine) Status() (status params.Status, info string, data params.StatusData, err error) {
   925  	doc, err := getStatus(m.st, m.globalKey())
   926  	if err != nil {
   927  		return "", "", nil, err
   928  	}
   929  	status = doc.Status
   930  	info = doc.StatusInfo
   931  	data = doc.StatusData
   932  	return
   933  }
   934  
   935  // SetStatus sets the status of the machine.
   936  func (m *Machine) SetStatus(status params.Status, info string, data params.StatusData) error {
   937  	doc := statusDoc{
   938  		Status:     status,
   939  		StatusInfo: info,
   940  		StatusData: data,
   941  	}
   942  	if err := doc.validateSet(); err != nil {
   943  		return err
   944  	}
   945  	ops := []txn.Op{{
   946  		C:      m.st.machines.Name,
   947  		Id:     m.doc.Id,
   948  		Assert: notDeadDoc,
   949  	},
   950  		updateStatusOp(m.st, m.globalKey(), doc),
   951  	}
   952  	if err := m.st.runTransaction(ops); err != nil {
   953  		return fmt.Errorf("cannot set status of machine %q: %v", m, onAbort(err, errNotAlive))
   954  	}
   955  	return nil
   956  }
   957  
   958  // Clean returns true if the machine does not have any deployed units or containers.
   959  func (m *Machine) Clean() bool {
   960  	return m.doc.Clean
   961  }
   962  
   963  // SupportedContainers returns any containers this machine is capable of hosting, and a bool
   964  // indicating if the supported containers have been determined or not.
   965  func (m *Machine) SupportedContainers() ([]instance.ContainerType, bool) {
   966  	return m.doc.SupportedContainers, m.doc.SupportedContainersKnown
   967  }
   968  
   969  // SupportsNoContainers records the fact that this machine doesn't support any containers.
   970  func (m *Machine) SupportsNoContainers() (err error) {
   971  	if err = m.updateSupportedContainers([]instance.ContainerType{}); err != nil {
   972  		return err
   973  	}
   974  	return m.markInvalidContainers()
   975  }
   976  
   977  // SetSupportedContainers sets the list of containers supported by this machine.
   978  func (m *Machine) SetSupportedContainers(containers []instance.ContainerType) (err error) {
   979  	if len(containers) == 0 {
   980  		return fmt.Errorf("at least one valid container type is required")
   981  	}
   982  	for _, container := range containers {
   983  		if container == instance.NONE {
   984  			return fmt.Errorf("%q is not a valid container type", container)
   985  		}
   986  	}
   987  	if err = m.updateSupportedContainers(containers); err != nil {
   988  		return err
   989  	}
   990  	return m.markInvalidContainers()
   991  }
   992  
   993  func isSupportedContainer(container instance.ContainerType, supportedContainers []instance.ContainerType) bool {
   994  	for _, supportedContainer := range supportedContainers {
   995  		if supportedContainer == container {
   996  			return true
   997  		}
   998  	}
   999  	return false
  1000  }
  1001  
  1002  // updateSupportedContainers sets the supported containers on this host machine.
  1003  func (m *Machine) updateSupportedContainers(supportedContainers []instance.ContainerType) (err error) {
  1004  	ops := []txn.Op{
  1005  		{
  1006  			C:      m.st.machines.Name,
  1007  			Id:     m.doc.Id,
  1008  			Assert: notDeadDoc,
  1009  			Update: D{
  1010  				{"$set", D{
  1011  					{"supportedcontainers", supportedContainers},
  1012  					{"supportedcontainersknown", true},
  1013  				}}},
  1014  		},
  1015  	}
  1016  	if err = m.st.runTransaction(ops); err != nil {
  1017  		return fmt.Errorf("cannot update supported containers of machine %v: %v", m, onAbort(err, errDead))
  1018  	}
  1019  	m.doc.SupportedContainers = supportedContainers
  1020  	m.doc.SupportedContainersKnown = true
  1021  	return nil
  1022  }
  1023  
  1024  // markInvalidContainers sets the status of any container belonging to this machine
  1025  // as being in error if the container type is not supported.
  1026  func (m *Machine) markInvalidContainers() error {
  1027  	currentContainers, err := m.Containers()
  1028  	if err != nil {
  1029  		return err
  1030  	}
  1031  	for _, containerId := range currentContainers {
  1032  		if !isSupportedContainer(ContainerTypeFromId(containerId), m.doc.SupportedContainers) {
  1033  			container, err := m.st.Machine(containerId)
  1034  			if err != nil {
  1035  				logger.Errorf("loading container %v to mark as invalid: %v", containerId, err)
  1036  				continue
  1037  			}
  1038  			// There should never be a circumstance where an unsupported container is started.
  1039  			// Nonetheless, we check and log an error if such a situation arises.
  1040  			status, _, _, err := container.Status()
  1041  			if err != nil {
  1042  				logger.Errorf("finding status of container %v to mark as invalid: %v", containerId, err)
  1043  				continue
  1044  			}
  1045  			if status == params.StatusPending {
  1046  				containerType := ContainerTypeFromId(containerId)
  1047  				container.SetStatus(
  1048  					params.StatusError, "unsupported container", params.StatusData{"type": containerType})
  1049  			} else {
  1050  				logger.Errorf("unsupported container %v has unexpected status %v", containerId, status)
  1051  			}
  1052  		}
  1053  	}
  1054  	return nil
  1055  }