github.com/cloudbase/juju-core@v0.0.0-20140504232958-a7271ac7912f/state/cleanup.go (about)

     1  package state
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"labix.org/v2/mgo/bson"
     7  	"labix.org/v2/mgo/txn"
     8  
     9  	"launchpad.net/juju-core/errors"
    10  )
    11  
    12  // cleanupDoc represents a potentially large set of documents that should be
    13  // removed.
    14  type cleanupDoc struct {
    15  	Id     bson.ObjectId `bson:"_id"`
    16  	Kind   string
    17  	Prefix string
    18  }
    19  
    20  // newCleanupOp returns a txn.Op that creates a cleanup document with a unique
    21  // id and the supplied kind and prefix.
    22  func (st *State) newCleanupOp(kind, prefix string) txn.Op {
    23  	doc := &cleanupDoc{
    24  		Id:     bson.NewObjectId(),
    25  		Kind:   kind,
    26  		Prefix: prefix,
    27  	}
    28  	return txn.Op{
    29  		C:      st.cleanups.Name,
    30  		Id:     doc.Id,
    31  		Insert: doc,
    32  	}
    33  }
    34  
    35  // NeedsCleanup returns true if documents previously marked for removal exist.
    36  func (st *State) NeedsCleanup() (bool, error) {
    37  	count, err := st.cleanups.Count()
    38  	if err != nil {
    39  		return false, err
    40  	}
    41  	return count > 0, nil
    42  }
    43  
    44  // Cleanup removes all documents that were previously marked for removal, if
    45  // any such exist. It should be called periodically by at least one element
    46  // of the system.
    47  func (st *State) Cleanup() error {
    48  	doc := cleanupDoc{}
    49  	iter := st.cleanups.Find(nil).Iter()
    50  	for iter.Next(&doc) {
    51  		var err error
    52  		logger.Debugf("running %q cleanup: %q", doc.Kind, doc.Prefix)
    53  		switch doc.Kind {
    54  		case "settings":
    55  			err = st.cleanupSettings(doc.Prefix)
    56  		case "units":
    57  			err = st.cleanupUnits(doc.Prefix)
    58  		case "services":
    59  			err = st.cleanupServices()
    60  		case "machine":
    61  			err = st.cleanupMachine(doc.Prefix)
    62  		default:
    63  			err = fmt.Errorf("unknown cleanup kind %q", doc.Kind)
    64  		}
    65  		if err != nil {
    66  			logger.Warningf("cleanup failed: %v", err)
    67  			continue
    68  		}
    69  		ops := []txn.Op{{
    70  			C:      st.cleanups.Name,
    71  			Id:     doc.Id,
    72  			Remove: true,
    73  		}}
    74  		if err := st.runTransaction(ops); err != nil {
    75  			logger.Warningf("cannot remove empty cleanup document: %v", err)
    76  		}
    77  	}
    78  	if err := iter.Err(); err != nil {
    79  		return fmt.Errorf("cannot read cleanup document: %v", err)
    80  	}
    81  	return nil
    82  }
    83  
    84  func (st *State) cleanupSettings(prefix string) error {
    85  	// Documents marked for cleanup are not otherwise referenced in the
    86  	// system, and will not be under watch, and are therefore safe to
    87  	// delete directly.
    88  	sel := D{{"_id", D{{"$regex", "^" + prefix}}}}
    89  	if count, err := st.settings.Find(sel).Count(); err != nil {
    90  		return fmt.Errorf("cannot detect cleanup targets: %v", err)
    91  	} else if count != 0 {
    92  		if _, err := st.settings.RemoveAll(sel); err != nil {
    93  			return fmt.Errorf("cannot remove documents marked for cleanup: %v", err)
    94  		}
    95  	}
    96  	return nil
    97  }
    98  
    99  // cleanupServices sets all services to Dying, if they are not already Dying
   100  // or Dead. It's expected to be used when an environment is destroyed.
   101  func (st *State) cleanupServices() error {
   102  	// This won't miss services, because a Dying environment cannot have
   103  	// services added to it. But we do have to remove the services themselves
   104  	// via individual transactions, because they could be in any state at all.
   105  	service := &Service{st: st}
   106  	sel := D{{"life", Alive}}
   107  	iter := st.services.Find(sel).Iter()
   108  	for iter.Next(&service.doc) {
   109  		if err := service.Destroy(); err != nil {
   110  			return err
   111  		}
   112  	}
   113  	if err := iter.Err(); err != nil {
   114  		return fmt.Errorf("cannot read service document: %v", err)
   115  	}
   116  	return nil
   117  }
   118  
   119  // cleanupUnits sets all units with the given prefix to Dying, if they are not
   120  // already Dying or Dead. It's expected to be used when a service is destroyed.
   121  func (st *State) cleanupUnits(prefix string) error {
   122  	// This won't miss units, because a Dying service cannot have units added
   123  	// to it. But we do have to remove the units themselves via individual
   124  	// transactions, because they could be in any state at all.
   125  	unit := &Unit{st: st}
   126  	sel := D{{"_id", D{{"$regex", "^" + prefix}}}, {"life", Alive}}
   127  	iter := st.units.Find(sel).Iter()
   128  	for iter.Next(&unit.doc) {
   129  		if err := unit.Destroy(); err != nil {
   130  			return err
   131  		}
   132  	}
   133  	if err := iter.Err(); err != nil {
   134  		return fmt.Errorf("cannot read unit document: %v", err)
   135  	}
   136  	return nil
   137  }
   138  
   139  // cleanupMachine systematically destroys and removes all entities that
   140  // depend upon the supplied machine, and removes the machine from state. It's
   141  // expected to be used in response to destroy-machine --force.
   142  func (st *State) cleanupMachine(machineId string) error {
   143  	machine, err := st.Machine(machineId)
   144  	if errors.IsNotFoundError(err) {
   145  		return nil
   146  	} else if err != nil {
   147  		return err
   148  	}
   149  	// In an ideal world, we'd call machine.Destroy() here, and thus prevent
   150  	// new dependencies being added while we clean up the ones we know about.
   151  	// But machine destruction is unsophisticated, and doesn't allow for
   152  	// destruction while dependencies exist; so we just have to deal with that
   153  	// possibility below.
   154  	if err := st.cleanupContainers(machine); err != nil {
   155  		return err
   156  	}
   157  	for _, unitName := range machine.doc.Principals {
   158  		if err := st.obliterateUnit(unitName); err != nil {
   159  			return err
   160  		}
   161  	}
   162  	// We need to refresh the machine at this point, because the local copy
   163  	// of the document will not reflect changes caused by the unit cleanups
   164  	// above, and may thus fail immediately.
   165  	if err := machine.Refresh(); errors.IsNotFoundError(err) {
   166  		return nil
   167  	} else if err != nil {
   168  		return err
   169  	}
   170  	// TODO(fwereade): 2013-11-11 bug 1250104
   171  	// If this fails, it's *probably* due to a race in which new dependencies
   172  	// were added while we cleaned up the old ones. If the cleanup doesn't run
   173  	// again -- which it *probably* will anyway -- the issue can be resolved by
   174  	// force-destroying the machine again; that's better than adding layer
   175  	// upon layer of complication here.
   176  	return machine.EnsureDead()
   177  
   178  	// Note that we do *not* remove the machine entirely: we leave it for the
   179  	// provisioner to clean up, so that we don't end up with an unreferenced
   180  	// instance that would otherwise be ignored when in provisioner-safe-mode.
   181  }
   182  
   183  // cleanupContainers recursively calls cleanupMachine on the supplied
   184  // machine's containers, and removes them from state entirely.
   185  func (st *State) cleanupContainers(machine *Machine) error {
   186  	containerIds, err := machine.Containers()
   187  	if errors.IsNotFoundError(err) {
   188  		return nil
   189  	} else if err != nil {
   190  		return err
   191  	}
   192  	for _, containerId := range containerIds {
   193  		if err := st.cleanupMachine(containerId); err != nil {
   194  			return err
   195  		}
   196  		container, err := st.Machine(containerId)
   197  		if errors.IsNotFoundError(err) {
   198  			return nil
   199  		} else if err != nil {
   200  			return err
   201  		}
   202  		if err := container.Remove(); err != nil {
   203  			return err
   204  		}
   205  	}
   206  	return nil
   207  }
   208  
   209  // obliterateUnit removes a unit from state completely. It is not safe or
   210  // sane to obliterate any unit in isolation; its only reasonable use is in
   211  // the context of machine obliteration, in which we can be sure that unclean
   212  // shutdown of units is not going to leave a machine in a difficult state.
   213  func (st *State) obliterateUnit(unitName string) error {
   214  	unit, err := st.Unit(unitName)
   215  	if errors.IsNotFoundError(err) {
   216  		return nil
   217  	} else if err != nil {
   218  		return err
   219  	}
   220  	// Unlike the machine, we *can* always destroy the unit, and (at least)
   221  	// prevent further dependencies being added. If we're really lucky, the
   222  	// unit will be removed immediately.
   223  	if err := unit.Destroy(); err != nil {
   224  		return err
   225  	}
   226  	if err := unit.Refresh(); errors.IsNotFoundError(err) {
   227  		return nil
   228  	} else if err != nil {
   229  		return err
   230  	}
   231  	for _, subName := range unit.SubordinateNames() {
   232  		if err := st.obliterateUnit(subName); err != nil {
   233  			return err
   234  		}
   235  	}
   236  	if err := unit.EnsureDead(); err != nil {
   237  		return err
   238  	}
   239  	return unit.Remove()
   240  }