github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/state/cleanup.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package state 5 6 import ( 7 "fmt" 8 "strings" 9 10 "github.com/juju/errors" 11 "github.com/juju/names" 12 "gopkg.in/mgo.v2" 13 "gopkg.in/mgo.v2/bson" 14 "gopkg.in/mgo.v2/txn" 15 ) 16 17 type cleanupKind string 18 19 const ( 20 // SCHEMACHANGE: the names are expressive, the values not so much. 21 cleanupRelationSettings cleanupKind = "settings" 22 cleanupUnitsForDyingService cleanupKind = "units" 23 cleanupDyingUnit cleanupKind = "dyingUnit" 24 cleanupRemovedUnit cleanupKind = "removedUnit" 25 cleanupServicesForDyingModel cleanupKind = "services" 26 cleanupDyingMachine cleanupKind = "dyingMachine" 27 cleanupForceDestroyedMachine cleanupKind = "machine" 28 cleanupAttachmentsForDyingStorage cleanupKind = "storageAttachments" 29 cleanupAttachmentsForDyingVolume cleanupKind = "volumeAttachments" 30 cleanupAttachmentsForDyingFilesystem cleanupKind = "filesystemAttachments" 31 cleanupModelsForDyingController cleanupKind = "models" 32 cleanupMachinesForDyingModel cleanupKind = "modelMachines" 33 ) 34 35 // cleanupDoc represents a potentially large set of documents that should be 36 // removed. 37 type cleanupDoc struct { 38 DocID string `bson:"_id"` 39 ModelUUID string `bson:"model-uuid"` 40 Kind cleanupKind 41 Prefix string 42 } 43 44 // newCleanupOp returns a txn.Op that creates a cleanup document with a unique 45 // id and the supplied kind and prefix. 46 func (st *State) newCleanupOp(kind cleanupKind, prefix string) txn.Op { 47 doc := &cleanupDoc{ 48 DocID: st.docID(fmt.Sprint(bson.NewObjectId())), 49 ModelUUID: st.ModelUUID(), 50 Kind: kind, 51 Prefix: prefix, 52 } 53 return txn.Op{ 54 C: cleanupsC, 55 Id: doc.DocID, 56 Insert: doc, 57 } 58 } 59 60 // NeedsCleanup returns true if documents previously marked for removal exist. 61 func (st *State) NeedsCleanup() (bool, error) { 62 cleanups, closer := st.getCollection(cleanupsC) 63 defer closer() 64 count, err := cleanups.Count() 65 if err != nil { 66 return false, err 67 } 68 return count > 0, nil 69 } 70 71 // Cleanup removes all documents that were previously marked for removal, if 72 // any such exist. It should be called periodically by at least one element 73 // of the system. 74 func (st *State) Cleanup() (err error) { 75 var doc cleanupDoc 76 cleanups, closer := st.getCollection(cleanupsC) 77 defer closer() 78 iter := cleanups.Find(nil).Iter() 79 defer closeIter(iter, &err, "reading cleanup document") 80 for iter.Next(&doc) { 81 var err error 82 logger.Debugf("running %q cleanup: %q", doc.Kind, doc.Prefix) 83 switch doc.Kind { 84 case cleanupRelationSettings: 85 err = st.cleanupRelationSettings(doc.Prefix) 86 case cleanupUnitsForDyingService: 87 err = st.cleanupUnitsForDyingService(doc.Prefix) 88 case cleanupDyingUnit: 89 err = st.cleanupDyingUnit(doc.Prefix) 90 case cleanupRemovedUnit: 91 err = st.cleanupRemovedUnit(doc.Prefix) 92 case cleanupServicesForDyingModel: 93 err = st.cleanupServicesForDyingModel() 94 case cleanupDyingMachine: 95 err = st.cleanupDyingMachine(doc.Prefix) 96 case cleanupForceDestroyedMachine: 97 err = st.cleanupForceDestroyedMachine(doc.Prefix) 98 case cleanupAttachmentsForDyingStorage: 99 err = st.cleanupAttachmentsForDyingStorage(doc.Prefix) 100 case cleanupAttachmentsForDyingVolume: 101 err = st.cleanupAttachmentsForDyingVolume(doc.Prefix) 102 case cleanupAttachmentsForDyingFilesystem: 103 err = st.cleanupAttachmentsForDyingFilesystem(doc.Prefix) 104 case cleanupModelsForDyingController: 105 err = st.cleanupModelsForDyingController() 106 case cleanupMachinesForDyingModel: 107 err = st.cleanupMachinesForDyingModel() 108 default: 109 handler, ok := cleanupHandlers[doc.Kind] 110 if !ok { 111 err = fmt.Errorf("unknown cleanup kind %q", doc.Kind) 112 } else { 113 persist := st.newPersistence() 114 err = handler(st, persist, doc.Prefix) 115 } 116 } 117 if err != nil { 118 logger.Warningf("cleanup failed: %v", err) 119 continue 120 } 121 ops := []txn.Op{{ 122 C: cleanupsC, 123 Id: doc.DocID, 124 Remove: true, 125 }} 126 if err := st.runTransaction(ops); err != nil { 127 logger.Warningf("cannot remove empty cleanup document: %v", err) 128 } 129 } 130 return nil 131 } 132 133 // CleanupHandler is a function that state may call during cleanup 134 // to perform cleanup actions for some cleanup type. 135 type CleanupHandler func(st *State, persist Persistence, prefix string) error 136 137 var cleanupHandlers = map[cleanupKind]CleanupHandler{} 138 139 // RegisterCleanupHandler identifies the handler to use a given 140 // cleanup kind. 141 func RegisterCleanupHandler(kindStr string, handler CleanupHandler) error { 142 kind := cleanupKind(kindStr) 143 if _, ok := cleanupHandlers[kind]; ok { 144 return errors.NewAlreadyExists(nil, fmt.Sprintf("cleanup handler for %q already registered", kindStr)) 145 } 146 cleanupHandlers[kind] = handler 147 return nil 148 } 149 150 func (st *State) cleanupRelationSettings(prefix string) error { 151 settings, closer := st.getCollection(settingsC) 152 defer closer() 153 // Documents marked for cleanup are not otherwise referenced in the 154 // system, and will not be under watch, and are therefore safe to 155 // delete directly. 156 settingsW := settings.Writeable() 157 158 sel := bson.D{{"_id", bson.D{{"$regex", "^" + st.docID(prefix)}}}} 159 if count, err := settingsW.Find(sel).Count(); err != nil { 160 return fmt.Errorf("cannot detect cleanup targets: %v", err) 161 } else if count != 0 { 162 if _, err := settingsW.RemoveAll(sel); err != nil { 163 return fmt.Errorf("cannot remove documents marked for cleanup: %v", err) 164 } 165 } 166 return nil 167 } 168 169 // cleanupModelsForDyingController sets all models to dying, if 170 // they are not already Dying or Dead. It's expected to be used when a 171 // controller is destroyed. 172 func (st *State) cleanupModelsForDyingController() (err error) { 173 models, err := st.AllModels() 174 if err != nil { 175 return errors.Trace(err) 176 } 177 for _, env := range models { 178 179 if env.Life() == Alive { 180 if err := env.Destroy(); err != nil { 181 return errors.Trace(err) 182 } 183 } 184 } 185 return nil 186 } 187 188 // cleanupMachinesForDyingModel sets all non-manager, non-manual 189 // machines to Dying, if they are not already Dying or Dead. It's expected to 190 // be used when a model is destroyed. 191 func (st *State) cleanupMachinesForDyingModel() (err error) { 192 // This won't miss machines, because a Dying model cannot have 193 // machines added to it. But we do have to remove the machines themselves 194 // via individual transactions, because they could be in any state at all. 195 machines, err := st.AllMachines() 196 if err != nil { 197 return errors.Trace(err) 198 } 199 for _, m := range machines { 200 if m.IsManager() { 201 continue 202 } 203 if _, isContainer := m.ParentId(); isContainer { 204 continue 205 } 206 manual, err := m.IsManual() 207 if err != nil { 208 return err 209 } else if manual { 210 continue 211 } 212 err = m.ForceDestroy() 213 if err != nil { 214 return errors.Trace(err) 215 } 216 } 217 return nil 218 } 219 220 // cleanupServicesForDyingModel sets all services to Dying, if they are 221 // not already Dying or Dead. It's expected to be used when a model is 222 // destroyed. 223 func (st *State) cleanupServicesForDyingModel() (err error) { 224 // This won't miss services, because a Dying model cannot have 225 // services added to it. But we do have to remove the services themselves 226 // via individual transactions, because they could be in any state at all. 227 services, closer := st.getCollection(servicesC) 228 defer closer() 229 service := Service{st: st} 230 sel := bson.D{{"life", Alive}} 231 iter := services.Find(sel).Iter() 232 defer closeIter(iter, &err, "reading service document") 233 for iter.Next(&service.doc) { 234 if err := service.Destroy(); err != nil { 235 return err 236 } 237 } 238 return nil 239 } 240 241 // cleanupUnitsForDyingService sets all units with the given prefix to Dying, 242 // if they are not already Dying or Dead. It's expected to be used when a 243 // service is destroyed. 244 func (st *State) cleanupUnitsForDyingService(serviceName string) (err error) { 245 // This won't miss units, because a Dying service cannot have units added 246 // to it. But we do have to remove the units themselves via individual 247 // transactions, because they could be in any state at all. 248 units, closer := st.getCollection(unitsC) 249 defer closer() 250 251 // TODO(mjs) - remove this post v1.21 252 // Older versions of the code put a trailing forward slash on the 253 // end of the service name. Remove it here in case a pre-upgrade 254 // cleanup document is seen. 255 serviceName = strings.TrimSuffix(serviceName, "/") 256 257 unit := Unit{st: st} 258 sel := bson.D{{"service", serviceName}, {"life", Alive}} 259 iter := units.Find(sel).Iter() 260 defer closeIter(iter, &err, "reading unit document") 261 for iter.Next(&unit.doc) { 262 if err := unit.Destroy(); err != nil { 263 return err 264 } 265 } 266 return nil 267 } 268 269 // cleanupDyingUnit marks resources owned by the unit as dying, to ensure 270 // they are cleaned up as well. 271 func (st *State) cleanupDyingUnit(name string) error { 272 unit, err := st.Unit(name) 273 if errors.IsNotFound(err) { 274 return nil 275 } else if err != nil { 276 return err 277 } 278 // Mark the unit as departing from its joined relations, allowing 279 // related units to start converging to a state in which that unit 280 // is gone as quickly as possible. 281 relations, err := unit.RelationsJoined() 282 if err != nil { 283 return err 284 } 285 for _, relation := range relations { 286 relationUnit, err := relation.Unit(unit) 287 if errors.IsNotFound(err) { 288 continue 289 } else if err != nil { 290 return err 291 } 292 if err := relationUnit.PrepareLeaveScope(); err != nil { 293 return err 294 } 295 } 296 // Mark storage attachments as dying, so that they are detached 297 // and removed from state, allowing the unit to terminate. 298 storageAttachments, err := st.UnitStorageAttachments(unit.UnitTag()) 299 if err != nil { 300 return err 301 } 302 for _, storageAttachment := range storageAttachments { 303 err := st.DestroyStorageAttachment( 304 storageAttachment.StorageInstance(), unit.UnitTag(), 305 ) 306 if errors.IsNotFound(err) { 307 continue 308 } else if err != nil { 309 return err 310 } 311 } 312 return nil 313 } 314 315 // cleanupRemovedUnit takes care of all the final cleanup required when 316 // a unit is removed. 317 func (st *State) cleanupRemovedUnit(unitId string) error { 318 actions, err := st.matchingActionsByReceiverId(unitId) 319 if err != nil { 320 return err 321 } 322 323 cancelled := ActionResults{Status: ActionCancelled, Message: "unit removed"} 324 for _, action := range actions { 325 if _, err = action.Finish(cancelled); err != nil { 326 return err 327 } 328 } 329 return nil 330 } 331 332 // cleanupDyingMachine marks resources owned by the machine as dying, to ensure 333 // they are cleaned up as well. 334 func (st *State) cleanupDyingMachine(machineId string) error { 335 machine, err := st.Machine(machineId) 336 if errors.IsNotFound(err) { 337 return nil 338 } else if err != nil { 339 return err 340 } 341 return cleanupDyingMachineResources(machine) 342 } 343 344 // cleanupForceDestroyedMachine systematically destroys and removes all entities 345 // that depend upon the supplied machine, and removes the machine from state. It's 346 // expected to be used in response to destroy-machine --force. 347 func (st *State) cleanupForceDestroyedMachine(machineId string) error { 348 machine, err := st.Machine(machineId) 349 if errors.IsNotFound(err) { 350 return nil 351 } else if err != nil { 352 return err 353 } 354 if err := cleanupDyingMachineResources(machine); err != nil { 355 return err 356 } 357 // In an ideal world, we'd call machine.Destroy() here, and thus prevent 358 // new dependencies being added while we clean up the ones we know about. 359 // But machine destruction is unsophisticated, and doesn't allow for 360 // destruction while dependencies exist; so we just have to deal with that 361 // possibility below. 362 if err := st.cleanupContainers(machine); err != nil { 363 return err 364 } 365 for _, unitName := range machine.doc.Principals { 366 if err := st.obliterateUnit(unitName); err != nil { 367 return err 368 } 369 } 370 // We need to refresh the machine at this point, because the local copy 371 // of the document will not reflect changes caused by the unit cleanups 372 // above, and may thus fail immediately. 373 if err := machine.Refresh(); errors.IsNotFound(err) { 374 return nil 375 } else if err != nil { 376 return err 377 } 378 // TODO(fwereade): 2013-11-11 bug 1250104 379 // If this fails, it's *probably* due to a race in which new dependencies 380 // were added while we cleaned up the old ones. If the cleanup doesn't run 381 // again -- which it *probably* will anyway -- the issue can be resolved by 382 // force-destroying the machine again; that's better than adding layer 383 // upon layer of complication here. 384 if err := machine.EnsureDead(); err != nil { 385 return err 386 } 387 removePortsOps, err := machine.removePortsOps() 388 if err != nil { 389 return err 390 } 391 return st.runTransaction(removePortsOps) 392 393 // Note that we do *not* remove the machine entirely: we leave it for the 394 // provisioner to clean up, so that we don't end up with an unreferenced 395 // instance that would otherwise be ignored when in provisioner-safe-mode. 396 } 397 398 // cleanupContainers recursively calls cleanupForceDestroyedMachine on the supplied 399 // machine's containers, and removes them from state entirely. 400 func (st *State) cleanupContainers(machine *Machine) error { 401 containerIds, err := machine.Containers() 402 if errors.IsNotFound(err) { 403 return nil 404 } else if err != nil { 405 return err 406 } 407 for _, containerId := range containerIds { 408 if err := st.cleanupForceDestroyedMachine(containerId); err != nil { 409 return err 410 } 411 container, err := st.Machine(containerId) 412 if errors.IsNotFound(err) { 413 return nil 414 } else if err != nil { 415 return err 416 } 417 if err := container.Remove(); err != nil { 418 return err 419 } 420 } 421 return nil 422 } 423 424 func cleanupDyingMachineResources(m *Machine) error { 425 volumeAttachments, err := m.st.MachineVolumeAttachments(m.MachineTag()) 426 if err != nil { 427 return errors.Annotate(err, "getting machine volume attachments") 428 } 429 for _, va := range volumeAttachments { 430 if err := m.st.DetachVolume(va.Machine(), va.Volume()); err != nil { 431 if IsContainsFilesystem(err) { 432 // The volume will be destroyed when the 433 // contained filesystem is removed, whose 434 // destruction is initiated below. 435 continue 436 } 437 return errors.Trace(err) 438 } 439 } 440 filesystemAttachments, err := m.st.MachineFilesystemAttachments(m.MachineTag()) 441 if err != nil { 442 return errors.Annotate(err, "getting machine filesystem attachments") 443 } 444 for _, fsa := range filesystemAttachments { 445 if err := m.st.DetachFilesystem(fsa.Machine(), fsa.Filesystem()); err != nil { 446 return errors.Trace(err) 447 } 448 } 449 return nil 450 } 451 452 // obliterateUnit removes a unit from state completely. It is not safe or 453 // sane to obliterate any unit in isolation; its only reasonable use is in 454 // the context of machine obliteration, in which we can be sure that unclean 455 // shutdown of units is not going to leave a machine in a difficult state. 456 func (st *State) obliterateUnit(unitName string) error { 457 unit, err := st.Unit(unitName) 458 if errors.IsNotFound(err) { 459 return nil 460 } else if err != nil { 461 return err 462 } 463 // Unlike the machine, we *can* always destroy the unit, and (at least) 464 // prevent further dependencies being added. If we're really lucky, the 465 // unit will be removed immediately. 466 if err := unit.Destroy(); err != nil { 467 return errors.Annotatef(err, "cannot destroy unit %q", unitName) 468 } 469 if err := unit.Refresh(); errors.IsNotFound(err) { 470 return nil 471 } else if err != nil { 472 return err 473 } 474 for _, subName := range unit.SubordinateNames() { 475 if err := st.obliterateUnit(subName); err != nil { 476 return err 477 } 478 } 479 if err := unit.EnsureDead(); err != nil { 480 return err 481 } 482 return unit.Remove() 483 } 484 485 // cleanupAttachmentsForDyingStorage sets all storage attachments related 486 // to the specified storage instance to Dying, if they are not already Dying 487 // or Dead. It's expected to be used when a storage instance is destroyed. 488 func (st *State) cleanupAttachmentsForDyingStorage(storageId string) (err error) { 489 storageTag := names.NewStorageTag(storageId) 490 491 // This won't miss attachments, because a Dying storage instance cannot 492 // have attachments added to it. But we do have to remove the attachments 493 // themselves via individual transactions, because they could be in 494 // any state at all. 495 coll, closer := st.getCollection(storageAttachmentsC) 496 defer closer() 497 498 var doc storageAttachmentDoc 499 fields := bson.D{{"unitid", 1}} 500 iter := coll.Find(bson.D{{"storageid", storageId}}).Select(fields).Iter() 501 defer closeIter(iter, &err, "reading storage attachment document") 502 for iter.Next(&doc) { 503 unitTag := names.NewUnitTag(doc.Unit) 504 if err := st.DestroyStorageAttachment(storageTag, unitTag); err != nil { 505 return errors.Annotate(err, "destroying storage attachment") 506 } 507 } 508 return nil 509 } 510 511 // cleanupAttachmentsForDyingVolume sets all volume attachments related 512 // to the specified volume to Dying, if they are not already Dying or 513 // Dead. It's expected to be used when a volume is destroyed. 514 func (st *State) cleanupAttachmentsForDyingVolume(volumeId string) (err error) { 515 volumeTag := names.NewVolumeTag(volumeId) 516 517 // This won't miss attachments, because a Dying volume cannot have 518 // attachments added to it. But we do have to remove the attachments 519 // themselves via individual transactions, because they could be in 520 // any state at all. 521 coll, closer := st.getCollection(volumeAttachmentsC) 522 defer closer() 523 524 var doc volumeAttachmentDoc 525 fields := bson.D{{"machineid", 1}} 526 iter := coll.Find(bson.D{{"volumeid", volumeId}}).Select(fields).Iter() 527 defer closeIter(iter, &err, "reading volume attachment document") 528 for iter.Next(&doc) { 529 machineTag := names.NewMachineTag(doc.Machine) 530 if err := st.DetachVolume(machineTag, volumeTag); err != nil { 531 return errors.Annotate(err, "destroying volume attachment") 532 } 533 } 534 return nil 535 } 536 537 // cleanupAttachmentsForDyingFilesystem sets all filesystem attachments related 538 // to the specified filesystem to Dying, if they are not already Dying or 539 // Dead. It's expected to be used when a filesystem is destroyed. 540 func (st *State) cleanupAttachmentsForDyingFilesystem(filesystemId string) (err error) { 541 filesystemTag := names.NewFilesystemTag(filesystemId) 542 543 // This won't miss attachments, because a Dying filesystem cannot have 544 // attachments added to it. But we do have to remove the attachments 545 // themselves via individual transactions, because they could be in 546 // any state at all. 547 coll, closer := st.getCollection(filesystemAttachmentsC) 548 defer closer() 549 550 var doc filesystemAttachmentDoc 551 fields := bson.D{{"machineid", 1}} 552 iter := coll.Find(bson.D{{"filesystemid", filesystemId}}).Select(fields).Iter() 553 defer closeIter(iter, &err, "reading filesystem attachment document") 554 for iter.Next(&doc) { 555 machineTag := names.NewMachineTag(doc.Machine) 556 if err := st.DetachFilesystem(machineTag, filesystemTag); err != nil { 557 return errors.Annotate(err, "destroying filesystem attachment") 558 } 559 } 560 return nil 561 } 562 563 func closeIter(iter *mgo.Iter, errOut *error, message string) { 564 err := iter.Close() 565 if err == nil { 566 return 567 } 568 err = errors.Annotate(err, message) 569 if *errOut == nil { 570 *errOut = err 571 return 572 } 573 logger.Errorf("%v", err) 574 }