launchpad.net/~rogpeppe/juju-core/500-errgo-fix@v0.0.0-20140213181702-000000002356/state/cleanup.go (about)

     1  package state
     2  
     3  import (
     4  	"labix.org/v2/mgo/bson"
     5  	"labix.org/v2/mgo/txn"
     6  
     7  	errgo "launchpad.net/errgo/errors"
     8  	"launchpad.net/juju-core/errors"
     9  )
    10  
    11  // cleanupDoc represents a potentially large set of documents that should be
    12  // removed.
    13  type cleanupDoc struct {
    14  	Id     bson.ObjectId `bson:"_id"`
    15  	Kind   string
    16  	Prefix string
    17  }
    18  
    19  // newCleanupOp returns a txn.Op that creates a cleanup document with a unique
    20  // id and the supplied kind and prefix.
    21  func (st *State) newCleanupOp(kind, prefix string) txn.Op {
    22  	doc := &cleanupDoc{
    23  		Id:     bson.NewObjectId(),
    24  		Kind:   kind,
    25  		Prefix: prefix,
    26  	}
    27  	return txn.Op{
    28  		C:      st.cleanups.Name,
    29  		Id:     doc.Id,
    30  		Insert: doc,
    31  	}
    32  }
    33  
    34  // NeedsCleanup returns true if documents previously marked for removal exist.
    35  func (st *State) NeedsCleanup() (bool, error) {
    36  	count, err := st.cleanups.Count()
    37  	if err != nil {
    38  		return false, mask(err)
    39  	}
    40  	return count > 0, nil
    41  }
    42  
    43  // Cleanup removes all documents that were previously marked for removal, if
    44  // any such exist. It should be called periodically by at least one element
    45  // of the system.
    46  func (st *State) Cleanup() error {
    47  	doc := cleanupDoc{}
    48  	iter := st.cleanups.Find(nil).Iter()
    49  	for iter.Next(&doc) {
    50  		var err error
    51  		logger.Debugf("running %q cleanup: %q", doc.Kind, doc.Prefix)
    52  		switch doc.Kind {
    53  		case "settings":
    54  			err = st.cleanupSettings(doc.Prefix)
    55  		case "units":
    56  			err = st.cleanupUnits(doc.Prefix)
    57  		case "services":
    58  			err = st.cleanupServices()
    59  		case "machine":
    60  			err = st.cleanupMachine(doc.Prefix)
    61  		default:
    62  			err = errgo.Newf("unknown cleanup kind %q", doc.Kind)
    63  		}
    64  		if err != nil {
    65  			logger.Warningf("cleanup failed: %v", err)
    66  			continue
    67  		}
    68  		ops := []txn.Op{{
    69  			C:      st.cleanups.Name,
    70  			Id:     doc.Id,
    71  			Remove: true,
    72  		}}
    73  		if err := st.runTransaction(ops); err != nil {
    74  			logger.Warningf("cannot remove empty cleanup document: %v", err)
    75  		}
    76  	}
    77  	if err := iter.Err(); err != nil {
    78  		return errgo.Notef(err, "cannot read cleanup document")
    79  	}
    80  	return nil
    81  }
    82  
    83  func (st *State) cleanupSettings(prefix string) error {
    84  	// Documents marked for cleanup are not otherwise referenced in the
    85  	// system, and will not be under watch, and are therefore safe to
    86  	// delete directly.
    87  	sel := D{{"_id", D{{"$regex", "^" + prefix}}}}
    88  	if count, err := st.settings.Find(sel).Count(); err != nil {
    89  		return errgo.Notef(err, "cannot detect cleanup targets")
    90  	} else if count != 0 {
    91  		if _, err := st.settings.RemoveAll(sel); err != nil {
    92  			return errgo.Notef(err, "cannot remove documents marked for cleanup")
    93  		}
    94  	}
    95  	return nil
    96  }
    97  
    98  // cleanupServices sets all services to Dying, if they are not already Dying
    99  // or Dead. It's expected to be used when an environment is destroyed.
   100  func (st *State) cleanupServices() error {
   101  	// This won't miss services, because a Dying environment cannot have
   102  	// services added to it. But we do have to remove the services themselves
   103  	// via individual transactions, because they could be in any state at all.
   104  	service := &Service{st: st}
   105  	sel := D{{"life", Alive}}
   106  	iter := st.services.Find(sel).Iter()
   107  	for iter.Next(&service.doc) {
   108  		if err := service.Destroy(); err != nil {
   109  			return mask(err)
   110  		}
   111  	}
   112  	if err := iter.Err(); err != nil {
   113  		return errgo.Notef(err, "cannot read service document")
   114  	}
   115  	return nil
   116  }
   117  
   118  // cleanupUnits sets all units with the given prefix to Dying, if they are not
   119  // already Dying or Dead. It's expected to be used when a service is destroyed.
   120  func (st *State) cleanupUnits(prefix string) error {
   121  	// This won't miss units, because a Dying service cannot have units added
   122  	// to it. But we do have to remove the units themselves via individual
   123  	// transactions, because they could be in any state at all.
   124  	unit := &Unit{st: st}
   125  	sel := D{{"_id", D{{"$regex", "^" + prefix}}}, {"life", Alive}}
   126  	iter := st.units.Find(sel).Iter()
   127  	for iter.Next(&unit.doc) {
   128  		if err := unit.Destroy(); err != nil {
   129  			return mask(err)
   130  		}
   131  	}
   132  	if err := iter.Err(); err != nil {
   133  		return errgo.Notef(err, "cannot read unit document")
   134  	}
   135  	return nil
   136  }
   137  
   138  // cleanupMachine systematically destroys and removes all entities that
   139  // depend upon the supplied machine, and removes the machine from state. It's
   140  // expected to be used in response to destroy-machine --force.
   141  func (st *State) cleanupMachine(machineId string) error {
   142  	machine, err := st.Machine(machineId)
   143  	if errors.IsNotFoundError(err) {
   144  		return nil
   145  	} else if err != nil {
   146  		return mask(err)
   147  	}
   148  
   149  	// In an ideal world, we'd call machine.Destroy() here, and thus prevent
   150  	// new dependencies being added while we clean up the ones we know about.
   151  	// But machine destruction is unsophisticated, and doesn't allow for
   152  	// destruction while dependencies exist; so we just have to deal with that
   153  	// possibility below.
   154  	if err := st.cleanupContainers(machine); err != nil {
   155  		return mask(err)
   156  	}
   157  	for _, unitName := range machine.doc.Principals {
   158  		if err := st.obliterateUnit(unitName); err != nil {
   159  			return mask(err)
   160  		}
   161  	}
   162  	// We need to refresh the machine at this point, because the local copy
   163  	// of the document will not reflect changes caused by the unit cleanups
   164  	// above, and may thus fail immediately.
   165  	if err := machine.Refresh(); errors.IsNotFoundError(err) {
   166  		return nil
   167  	} else if err != nil {
   168  		return mask(err)
   169  	}
   170  
   171  	// TODO(fwereade): 2013-11-11 bug 1250104
   172  	// If this fails, it's *probably* due to a race in which new dependencies
   173  	// were added while we cleaned up the old ones. If the cleanup doesn't run
   174  	// again -- which it *probably* will anyway -- the issue can be resolved by
   175  	// force-destroying the machine again; that's better than adding layer
   176  	// upon layer of complication here.
   177  	return machine.EnsureDead()
   178  
   179  	// Note that we do *not* remove the machine entirely: we leave it for the
   180  	// provisioner to clean up, so that we don't end up with an unreferenced
   181  	// instance that would otherwise be ignored when in provisioner-safe-mode.
   182  }
   183  
   184  // cleanupContainers recursively calls cleanupMachine on the supplied
   185  // machine's containers, and removes them from state entirely.
   186  func (st *State) cleanupContainers(machine *Machine) error {
   187  	containerIds, err := machine.Containers()
   188  	if errors.IsNotFoundError(err) {
   189  		return nil
   190  	} else if err != nil {
   191  		return mask(err)
   192  	}
   193  	for _, containerId := range containerIds {
   194  		if err := st.cleanupMachine(containerId); err != nil {
   195  			return mask(err)
   196  		}
   197  		container, err := st.Machine(containerId)
   198  		if errors.IsNotFoundError(err) {
   199  			return nil
   200  		} else if err != nil {
   201  			return mask(err)
   202  		}
   203  		if err := container.Remove(); err != nil {
   204  			return mask(err)
   205  		}
   206  	}
   207  	return nil
   208  }
   209  
   210  // obliterateUnit removes a unit from state completely. It is not safe or
   211  // sane to obliterate any unit in isolation; its only reasonable use is in
   212  // the context of machine obliteration, in which we can be sure that unclean
   213  // shutdown of units is not going to leave a machine in a difficult state.
   214  func (st *State) obliterateUnit(unitName string) error {
   215  	unit, err := st.Unit(unitName)
   216  	if errors.IsNotFoundError(err) {
   217  		return nil
   218  	} else if err != nil {
   219  		return mask(err)
   220  	}
   221  
   222  	// Unlike the machine, we *can* always destroy the unit, and (at least)
   223  	// prevent further dependencies being added. If we're really lucky, the
   224  	// unit will be removed immediately.
   225  	if err := unit.Destroy(); err != nil {
   226  		return mask(err)
   227  	}
   228  	if err := unit.Refresh(); errors.IsNotFoundError(err) {
   229  		return nil
   230  	} else if err != nil {
   231  		return mask(err)
   232  	}
   233  	for _, subName := range unit.SubordinateNames() {
   234  		if err := st.obliterateUnit(subName); err != nil {
   235  			return mask(err)
   236  		}
   237  	}
   238  	if err := unit.EnsureDead(); err != nil {
   239  		return mask(err)
   240  	}
   241  	return unit.Remove()
   242  }