github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/state/cleanup.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package state
     5  
     6  import (
     7  	"fmt"
     8  	"strings"
     9  
    10  	"github.com/juju/errors"
    11  	"github.com/juju/names"
    12  	"gopkg.in/mgo.v2"
    13  	"gopkg.in/mgo.v2/bson"
    14  	"gopkg.in/mgo.v2/txn"
    15  )
    16  
    17  type cleanupKind string
    18  
    19  const (
    20  	// SCHEMACHANGE: the names are expressive, the values not so much.
    21  	cleanupRelationSettings              cleanupKind = "settings"
    22  	cleanupUnitsForDyingService          cleanupKind = "units"
    23  	cleanupDyingUnit                     cleanupKind = "dyingUnit"
    24  	cleanupRemovedUnit                   cleanupKind = "removedUnit"
    25  	cleanupServicesForDyingModel         cleanupKind = "services"
    26  	cleanupDyingMachine                  cleanupKind = "dyingMachine"
    27  	cleanupForceDestroyedMachine         cleanupKind = "machine"
    28  	cleanupAttachmentsForDyingStorage    cleanupKind = "storageAttachments"
    29  	cleanupAttachmentsForDyingVolume     cleanupKind = "volumeAttachments"
    30  	cleanupAttachmentsForDyingFilesystem cleanupKind = "filesystemAttachments"
    31  	cleanupModelsForDyingController      cleanupKind = "models"
    32  	cleanupMachinesForDyingModel         cleanupKind = "modelMachines"
    33  )
    34  
    35  // cleanupDoc represents a potentially large set of documents that should be
    36  // removed.
    37  type cleanupDoc struct {
    38  	DocID     string `bson:"_id"`
    39  	ModelUUID string `bson:"model-uuid"`
    40  	Kind      cleanupKind
    41  	Prefix    string
    42  }
    43  
    44  // newCleanupOp returns a txn.Op that creates a cleanup document with a unique
    45  // id and the supplied kind and prefix.
    46  func (st *State) newCleanupOp(kind cleanupKind, prefix string) txn.Op {
    47  	doc := &cleanupDoc{
    48  		DocID:     st.docID(fmt.Sprint(bson.NewObjectId())),
    49  		ModelUUID: st.ModelUUID(),
    50  		Kind:      kind,
    51  		Prefix:    prefix,
    52  	}
    53  	return txn.Op{
    54  		C:      cleanupsC,
    55  		Id:     doc.DocID,
    56  		Insert: doc,
    57  	}
    58  }
    59  
    60  // NeedsCleanup returns true if documents previously marked for removal exist.
    61  func (st *State) NeedsCleanup() (bool, error) {
    62  	cleanups, closer := st.getCollection(cleanupsC)
    63  	defer closer()
    64  	count, err := cleanups.Count()
    65  	if err != nil {
    66  		return false, err
    67  	}
    68  	return count > 0, nil
    69  }
    70  
    71  // Cleanup removes all documents that were previously marked for removal, if
    72  // any such exist. It should be called periodically by at least one element
    73  // of the system.
    74  func (st *State) Cleanup() (err error) {
    75  	var doc cleanupDoc
    76  	cleanups, closer := st.getCollection(cleanupsC)
    77  	defer closer()
    78  	iter := cleanups.Find(nil).Iter()
    79  	defer closeIter(iter, &err, "reading cleanup document")
    80  	for iter.Next(&doc) {
    81  		var err error
    82  		logger.Debugf("running %q cleanup: %q", doc.Kind, doc.Prefix)
    83  		switch doc.Kind {
    84  		case cleanupRelationSettings:
    85  			err = st.cleanupRelationSettings(doc.Prefix)
    86  		case cleanupUnitsForDyingService:
    87  			err = st.cleanupUnitsForDyingService(doc.Prefix)
    88  		case cleanupDyingUnit:
    89  			err = st.cleanupDyingUnit(doc.Prefix)
    90  		case cleanupRemovedUnit:
    91  			err = st.cleanupRemovedUnit(doc.Prefix)
    92  		case cleanupServicesForDyingModel:
    93  			err = st.cleanupServicesForDyingModel()
    94  		case cleanupDyingMachine:
    95  			err = st.cleanupDyingMachine(doc.Prefix)
    96  		case cleanupForceDestroyedMachine:
    97  			err = st.cleanupForceDestroyedMachine(doc.Prefix)
    98  		case cleanupAttachmentsForDyingStorage:
    99  			err = st.cleanupAttachmentsForDyingStorage(doc.Prefix)
   100  		case cleanupAttachmentsForDyingVolume:
   101  			err = st.cleanupAttachmentsForDyingVolume(doc.Prefix)
   102  		case cleanupAttachmentsForDyingFilesystem:
   103  			err = st.cleanupAttachmentsForDyingFilesystem(doc.Prefix)
   104  		case cleanupModelsForDyingController:
   105  			err = st.cleanupModelsForDyingController()
   106  		case cleanupMachinesForDyingModel:
   107  			err = st.cleanupMachinesForDyingModel()
   108  		default:
   109  			handler, ok := cleanupHandlers[doc.Kind]
   110  			if !ok {
   111  				err = fmt.Errorf("unknown cleanup kind %q", doc.Kind)
   112  			} else {
   113  				persist := st.newPersistence()
   114  				err = handler(st, persist, doc.Prefix)
   115  			}
   116  		}
   117  		if err != nil {
   118  			logger.Warningf("cleanup failed: %v", err)
   119  			continue
   120  		}
   121  		ops := []txn.Op{{
   122  			C:      cleanupsC,
   123  			Id:     doc.DocID,
   124  			Remove: true,
   125  		}}
   126  		if err := st.runTransaction(ops); err != nil {
   127  			logger.Warningf("cannot remove empty cleanup document: %v", err)
   128  		}
   129  	}
   130  	return nil
   131  }
   132  
   133  // CleanupHandler is a function that state may call during cleanup
   134  // to perform cleanup actions for some cleanup type.
   135  type CleanupHandler func(st *State, persist Persistence, prefix string) error
   136  
   137  var cleanupHandlers = map[cleanupKind]CleanupHandler{}
   138  
   139  // RegisterCleanupHandler identifies the handler to use a given
   140  // cleanup kind.
   141  func RegisterCleanupHandler(kindStr string, handler CleanupHandler) error {
   142  	kind := cleanupKind(kindStr)
   143  	if _, ok := cleanupHandlers[kind]; ok {
   144  		return errors.NewAlreadyExists(nil, fmt.Sprintf("cleanup handler for %q already registered", kindStr))
   145  	}
   146  	cleanupHandlers[kind] = handler
   147  	return nil
   148  }
   149  
   150  func (st *State) cleanupRelationSettings(prefix string) error {
   151  	settings, closer := st.getCollection(settingsC)
   152  	defer closer()
   153  	// Documents marked for cleanup are not otherwise referenced in the
   154  	// system, and will not be under watch, and are therefore safe to
   155  	// delete directly.
   156  	settingsW := settings.Writeable()
   157  
   158  	sel := bson.D{{"_id", bson.D{{"$regex", "^" + st.docID(prefix)}}}}
   159  	if count, err := settingsW.Find(sel).Count(); err != nil {
   160  		return fmt.Errorf("cannot detect cleanup targets: %v", err)
   161  	} else if count != 0 {
   162  		if _, err := settingsW.RemoveAll(sel); err != nil {
   163  			return fmt.Errorf("cannot remove documents marked for cleanup: %v", err)
   164  		}
   165  	}
   166  	return nil
   167  }
   168  
   169  // cleanupModelsForDyingController sets all models to dying, if
   170  // they are not already Dying or Dead. It's expected to be used when a
   171  // controller is destroyed.
   172  func (st *State) cleanupModelsForDyingController() (err error) {
   173  	models, err := st.AllModels()
   174  	if err != nil {
   175  		return errors.Trace(err)
   176  	}
   177  	for _, env := range models {
   178  
   179  		if env.Life() == Alive {
   180  			if err := env.Destroy(); err != nil {
   181  				return errors.Trace(err)
   182  			}
   183  		}
   184  	}
   185  	return nil
   186  }
   187  
   188  // cleanupMachinesForDyingModel sets all non-manager, non-manual
   189  // machines to Dying, if they are not already Dying or Dead. It's expected to
   190  // be used when a model is destroyed.
   191  func (st *State) cleanupMachinesForDyingModel() (err error) {
   192  	// This won't miss machines, because a Dying model cannot have
   193  	// machines added to it. But we do have to remove the machines themselves
   194  	// via individual transactions, because they could be in any state at all.
   195  	machines, err := st.AllMachines()
   196  	if err != nil {
   197  		return errors.Trace(err)
   198  	}
   199  	for _, m := range machines {
   200  		if m.IsManager() {
   201  			continue
   202  		}
   203  		if _, isContainer := m.ParentId(); isContainer {
   204  			continue
   205  		}
   206  		manual, err := m.IsManual()
   207  		if err != nil {
   208  			return err
   209  		} else if manual {
   210  			continue
   211  		}
   212  		err = m.ForceDestroy()
   213  		if err != nil {
   214  			return errors.Trace(err)
   215  		}
   216  	}
   217  	return nil
   218  }
   219  
   220  // cleanupServicesForDyingModel sets all services to Dying, if they are
   221  // not already Dying or Dead. It's expected to be used when a model is
   222  // destroyed.
   223  func (st *State) cleanupServicesForDyingModel() (err error) {
   224  	// This won't miss services, because a Dying model cannot have
   225  	// services added to it. But we do have to remove the services themselves
   226  	// via individual transactions, because they could be in any state at all.
   227  	services, closer := st.getCollection(servicesC)
   228  	defer closer()
   229  	service := Service{st: st}
   230  	sel := bson.D{{"life", Alive}}
   231  	iter := services.Find(sel).Iter()
   232  	defer closeIter(iter, &err, "reading service document")
   233  	for iter.Next(&service.doc) {
   234  		if err := service.Destroy(); err != nil {
   235  			return err
   236  		}
   237  	}
   238  	return nil
   239  }
   240  
   241  // cleanupUnitsForDyingService sets all units with the given prefix to Dying,
   242  // if they are not already Dying or Dead. It's expected to be used when a
   243  // service is destroyed.
   244  func (st *State) cleanupUnitsForDyingService(serviceName string) (err error) {
   245  	// This won't miss units, because a Dying service cannot have units added
   246  	// to it. But we do have to remove the units themselves via individual
   247  	// transactions, because they could be in any state at all.
   248  	units, closer := st.getCollection(unitsC)
   249  	defer closer()
   250  
   251  	// TODO(mjs) - remove this post v1.21
   252  	// Older versions of the code put a trailing forward slash on the
   253  	// end of the service name. Remove it here in case a pre-upgrade
   254  	// cleanup document is seen.
   255  	serviceName = strings.TrimSuffix(serviceName, "/")
   256  
   257  	unit := Unit{st: st}
   258  	sel := bson.D{{"service", serviceName}, {"life", Alive}}
   259  	iter := units.Find(sel).Iter()
   260  	defer closeIter(iter, &err, "reading unit document")
   261  	for iter.Next(&unit.doc) {
   262  		if err := unit.Destroy(); err != nil {
   263  			return err
   264  		}
   265  	}
   266  	return nil
   267  }
   268  
   269  // cleanupDyingUnit marks resources owned by the unit as dying, to ensure
   270  // they are cleaned up as well.
   271  func (st *State) cleanupDyingUnit(name string) error {
   272  	unit, err := st.Unit(name)
   273  	if errors.IsNotFound(err) {
   274  		return nil
   275  	} else if err != nil {
   276  		return err
   277  	}
   278  	// Mark the unit as departing from its joined relations, allowing
   279  	// related units to start converging to a state in which that unit
   280  	// is gone as quickly as possible.
   281  	relations, err := unit.RelationsJoined()
   282  	if err != nil {
   283  		return err
   284  	}
   285  	for _, relation := range relations {
   286  		relationUnit, err := relation.Unit(unit)
   287  		if errors.IsNotFound(err) {
   288  			continue
   289  		} else if err != nil {
   290  			return err
   291  		}
   292  		if err := relationUnit.PrepareLeaveScope(); err != nil {
   293  			return err
   294  		}
   295  	}
   296  	// Mark storage attachments as dying, so that they are detached
   297  	// and removed from state, allowing the unit to terminate.
   298  	storageAttachments, err := st.UnitStorageAttachments(unit.UnitTag())
   299  	if err != nil {
   300  		return err
   301  	}
   302  	for _, storageAttachment := range storageAttachments {
   303  		err := st.DestroyStorageAttachment(
   304  			storageAttachment.StorageInstance(), unit.UnitTag(),
   305  		)
   306  		if errors.IsNotFound(err) {
   307  			continue
   308  		} else if err != nil {
   309  			return err
   310  		}
   311  	}
   312  	return nil
   313  }
   314  
   315  // cleanupRemovedUnit takes care of all the final cleanup required when
   316  // a unit is removed.
   317  func (st *State) cleanupRemovedUnit(unitId string) error {
   318  	actions, err := st.matchingActionsByReceiverId(unitId)
   319  	if err != nil {
   320  		return err
   321  	}
   322  
   323  	cancelled := ActionResults{Status: ActionCancelled, Message: "unit removed"}
   324  	for _, action := range actions {
   325  		if _, err = action.Finish(cancelled); err != nil {
   326  			return err
   327  		}
   328  	}
   329  	return nil
   330  }
   331  
   332  // cleanupDyingMachine marks resources owned by the machine as dying, to ensure
   333  // they are cleaned up as well.
   334  func (st *State) cleanupDyingMachine(machineId string) error {
   335  	machine, err := st.Machine(machineId)
   336  	if errors.IsNotFound(err) {
   337  		return nil
   338  	} else if err != nil {
   339  		return err
   340  	}
   341  	return cleanupDyingMachineResources(machine)
   342  }
   343  
   344  // cleanupForceDestroyedMachine systematically destroys and removes all entities
   345  // that depend upon the supplied machine, and removes the machine from state. It's
   346  // expected to be used in response to destroy-machine --force.
   347  func (st *State) cleanupForceDestroyedMachine(machineId string) error {
   348  	machine, err := st.Machine(machineId)
   349  	if errors.IsNotFound(err) {
   350  		return nil
   351  	} else if err != nil {
   352  		return err
   353  	}
   354  	if err := cleanupDyingMachineResources(machine); err != nil {
   355  		return err
   356  	}
   357  	// In an ideal world, we'd call machine.Destroy() here, and thus prevent
   358  	// new dependencies being added while we clean up the ones we know about.
   359  	// But machine destruction is unsophisticated, and doesn't allow for
   360  	// destruction while dependencies exist; so we just have to deal with that
   361  	// possibility below.
   362  	if err := st.cleanupContainers(machine); err != nil {
   363  		return err
   364  	}
   365  	for _, unitName := range machine.doc.Principals {
   366  		if err := st.obliterateUnit(unitName); err != nil {
   367  			return err
   368  		}
   369  	}
   370  	// We need to refresh the machine at this point, because the local copy
   371  	// of the document will not reflect changes caused by the unit cleanups
   372  	// above, and may thus fail immediately.
   373  	if err := machine.Refresh(); errors.IsNotFound(err) {
   374  		return nil
   375  	} else if err != nil {
   376  		return err
   377  	}
   378  	// TODO(fwereade): 2013-11-11 bug 1250104
   379  	// If this fails, it's *probably* due to a race in which new dependencies
   380  	// were added while we cleaned up the old ones. If the cleanup doesn't run
   381  	// again -- which it *probably* will anyway -- the issue can be resolved by
   382  	// force-destroying the machine again; that's better than adding layer
   383  	// upon layer of complication here.
   384  	if err := machine.EnsureDead(); err != nil {
   385  		return err
   386  	}
   387  	removePortsOps, err := machine.removePortsOps()
   388  	if err != nil {
   389  		return err
   390  	}
   391  	return st.runTransaction(removePortsOps)
   392  
   393  	// Note that we do *not* remove the machine entirely: we leave it for the
   394  	// provisioner to clean up, so that we don't end up with an unreferenced
   395  	// instance that would otherwise be ignored when in provisioner-safe-mode.
   396  }
   397  
   398  // cleanupContainers recursively calls cleanupForceDestroyedMachine on the supplied
   399  // machine's containers, and removes them from state entirely.
   400  func (st *State) cleanupContainers(machine *Machine) error {
   401  	containerIds, err := machine.Containers()
   402  	if errors.IsNotFound(err) {
   403  		return nil
   404  	} else if err != nil {
   405  		return err
   406  	}
   407  	for _, containerId := range containerIds {
   408  		if err := st.cleanupForceDestroyedMachine(containerId); err != nil {
   409  			return err
   410  		}
   411  		container, err := st.Machine(containerId)
   412  		if errors.IsNotFound(err) {
   413  			return nil
   414  		} else if err != nil {
   415  			return err
   416  		}
   417  		if err := container.Remove(); err != nil {
   418  			return err
   419  		}
   420  	}
   421  	return nil
   422  }
   423  
   424  func cleanupDyingMachineResources(m *Machine) error {
   425  	volumeAttachments, err := m.st.MachineVolumeAttachments(m.MachineTag())
   426  	if err != nil {
   427  		return errors.Annotate(err, "getting machine volume attachments")
   428  	}
   429  	for _, va := range volumeAttachments {
   430  		if err := m.st.DetachVolume(va.Machine(), va.Volume()); err != nil {
   431  			if IsContainsFilesystem(err) {
   432  				// The volume will be destroyed when the
   433  				// contained filesystem is removed, whose
   434  				// destruction is initiated below.
   435  				continue
   436  			}
   437  			return errors.Trace(err)
   438  		}
   439  	}
   440  	filesystemAttachments, err := m.st.MachineFilesystemAttachments(m.MachineTag())
   441  	if err != nil {
   442  		return errors.Annotate(err, "getting machine filesystem attachments")
   443  	}
   444  	for _, fsa := range filesystemAttachments {
   445  		if err := m.st.DetachFilesystem(fsa.Machine(), fsa.Filesystem()); err != nil {
   446  			return errors.Trace(err)
   447  		}
   448  	}
   449  	return nil
   450  }
   451  
   452  // obliterateUnit removes a unit from state completely. It is not safe or
   453  // sane to obliterate any unit in isolation; its only reasonable use is in
   454  // the context of machine obliteration, in which we can be sure that unclean
   455  // shutdown of units is not going to leave a machine in a difficult state.
   456  func (st *State) obliterateUnit(unitName string) error {
   457  	unit, err := st.Unit(unitName)
   458  	if errors.IsNotFound(err) {
   459  		return nil
   460  	} else if err != nil {
   461  		return err
   462  	}
   463  	// Unlike the machine, we *can* always destroy the unit, and (at least)
   464  	// prevent further dependencies being added. If we're really lucky, the
   465  	// unit will be removed immediately.
   466  	if err := unit.Destroy(); err != nil {
   467  		return errors.Annotatef(err, "cannot destroy unit %q", unitName)
   468  	}
   469  	if err := unit.Refresh(); errors.IsNotFound(err) {
   470  		return nil
   471  	} else if err != nil {
   472  		return err
   473  	}
   474  	for _, subName := range unit.SubordinateNames() {
   475  		if err := st.obliterateUnit(subName); err != nil {
   476  			return err
   477  		}
   478  	}
   479  	if err := unit.EnsureDead(); err != nil {
   480  		return err
   481  	}
   482  	return unit.Remove()
   483  }
   484  
   485  // cleanupAttachmentsForDyingStorage sets all storage attachments related
   486  // to the specified storage instance to Dying, if they are not already Dying
   487  // or Dead. It's expected to be used when a storage instance is destroyed.
   488  func (st *State) cleanupAttachmentsForDyingStorage(storageId string) (err error) {
   489  	storageTag := names.NewStorageTag(storageId)
   490  
   491  	// This won't miss attachments, because a Dying storage instance cannot
   492  	// have attachments added to it. But we do have to remove the attachments
   493  	// themselves via individual transactions, because they could be in
   494  	// any state at all.
   495  	coll, closer := st.getCollection(storageAttachmentsC)
   496  	defer closer()
   497  
   498  	var doc storageAttachmentDoc
   499  	fields := bson.D{{"unitid", 1}}
   500  	iter := coll.Find(bson.D{{"storageid", storageId}}).Select(fields).Iter()
   501  	defer closeIter(iter, &err, "reading storage attachment document")
   502  	for iter.Next(&doc) {
   503  		unitTag := names.NewUnitTag(doc.Unit)
   504  		if err := st.DestroyStorageAttachment(storageTag, unitTag); err != nil {
   505  			return errors.Annotate(err, "destroying storage attachment")
   506  		}
   507  	}
   508  	return nil
   509  }
   510  
   511  // cleanupAttachmentsForDyingVolume sets all volume attachments related
   512  // to the specified volume to Dying, if they are not already Dying or
   513  // Dead. It's expected to be used when a volume is destroyed.
   514  func (st *State) cleanupAttachmentsForDyingVolume(volumeId string) (err error) {
   515  	volumeTag := names.NewVolumeTag(volumeId)
   516  
   517  	// This won't miss attachments, because a Dying volume cannot have
   518  	// attachments added to it. But we do have to remove the attachments
   519  	// themselves via individual transactions, because they could be in
   520  	// any state at all.
   521  	coll, closer := st.getCollection(volumeAttachmentsC)
   522  	defer closer()
   523  
   524  	var doc volumeAttachmentDoc
   525  	fields := bson.D{{"machineid", 1}}
   526  	iter := coll.Find(bson.D{{"volumeid", volumeId}}).Select(fields).Iter()
   527  	defer closeIter(iter, &err, "reading volume attachment document")
   528  	for iter.Next(&doc) {
   529  		machineTag := names.NewMachineTag(doc.Machine)
   530  		if err := st.DetachVolume(machineTag, volumeTag); err != nil {
   531  			return errors.Annotate(err, "destroying volume attachment")
   532  		}
   533  	}
   534  	return nil
   535  }
   536  
   537  // cleanupAttachmentsForDyingFilesystem sets all filesystem attachments related
   538  // to the specified filesystem to Dying, if they are not already Dying or
   539  // Dead. It's expected to be used when a filesystem is destroyed.
   540  func (st *State) cleanupAttachmentsForDyingFilesystem(filesystemId string) (err error) {
   541  	filesystemTag := names.NewFilesystemTag(filesystemId)
   542  
   543  	// This won't miss attachments, because a Dying filesystem cannot have
   544  	// attachments added to it. But we do have to remove the attachments
   545  	// themselves via individual transactions, because they could be in
   546  	// any state at all.
   547  	coll, closer := st.getCollection(filesystemAttachmentsC)
   548  	defer closer()
   549  
   550  	var doc filesystemAttachmentDoc
   551  	fields := bson.D{{"machineid", 1}}
   552  	iter := coll.Find(bson.D{{"filesystemid", filesystemId}}).Select(fields).Iter()
   553  	defer closeIter(iter, &err, "reading filesystem attachment document")
   554  	for iter.Next(&doc) {
   555  		machineTag := names.NewMachineTag(doc.Machine)
   556  		if err := st.DetachFilesystem(machineTag, filesystemTag); err != nil {
   557  			return errors.Annotate(err, "destroying filesystem attachment")
   558  		}
   559  	}
   560  	return nil
   561  }
   562  
   563  func closeIter(iter *mgo.Iter, errOut *error, message string) {
   564  	err := iter.Close()
   565  	if err == nil {
   566  		return
   567  	}
   568  	err = errors.Annotate(err, message)
   569  	if *errOut == nil {
   570  		*errOut = err
   571  		return
   572  	}
   573  	logger.Errorf("%v", err)
   574  }